|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.998769987699877, |
|
"eval_steps": 500, |
|
"global_step": 6500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15375153751537515, |
|
"grad_norm": 1.2622125148773193, |
|
"learning_rate": 1.8471709717097174e-05, |
|
"loss": 0.909, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3075030750307503, |
|
"grad_norm": 0.7884178161621094, |
|
"learning_rate": 1.693419434194342e-05, |
|
"loss": 0.2141, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4612546125461255, |
|
"grad_norm": 0.774986743927002, |
|
"learning_rate": 1.5396678966789668e-05, |
|
"loss": 0.1649, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6150061500615006, |
|
"grad_norm": 0.8079761266708374, |
|
"learning_rate": 1.3859163591635918e-05, |
|
"loss": 0.144, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7687576875768758, |
|
"grad_norm": 0.4618232846260071, |
|
"learning_rate": 1.2321648216482165e-05, |
|
"loss": 0.1384, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.922509225092251, |
|
"grad_norm": 0.5913366675376892, |
|
"learning_rate": 1.0784132841328414e-05, |
|
"loss": 0.1315, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 62.5775, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.10974862426519394, |
|
"eval_runtime": 129.6407, |
|
"eval_samples_per_second": 100.331, |
|
"eval_steps_per_second": 6.271, |
|
"step": 3252 |
|
}, |
|
{ |
|
"epoch": 1.0762607626076262, |
|
"grad_norm": 0.4670741558074951, |
|
"learning_rate": 9.246617466174663e-06, |
|
"loss": 0.126, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.2300123001230012, |
|
"grad_norm": 0.44761019945144653, |
|
"learning_rate": 7.709102091020911e-06, |
|
"loss": 0.1264, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.3837638376383765, |
|
"grad_norm": 0.4738340675830841, |
|
"learning_rate": 6.171586715867159e-06, |
|
"loss": 0.124, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.5375153751537516, |
|
"grad_norm": 0.7554705739021301, |
|
"learning_rate": 4.634071340713408e-06, |
|
"loss": 0.1221, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.6912669126691267, |
|
"grad_norm": 0.39538097381591797, |
|
"learning_rate": 3.0965559655596562e-06, |
|
"loss": 0.1238, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.8450184501845017, |
|
"grad_norm": 0.5276034474372864, |
|
"learning_rate": 1.5621156211562116e-06, |
|
"loss": 0.1208, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.998769987699877, |
|
"grad_norm": 0.4468577802181244, |
|
"learning_rate": 2.4600246002460026e-08, |
|
"loss": 0.1227, |
|
"step": 6500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6504, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3518616154669056.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|