File size: 2,388 Bytes
9c5668e b891330 9c5668e b891330 9c5668e b891330 9c5668e b891330 9c5668e b891330 9c5668e b891330 9c5668e b891330 9c5668e b891330 9c5668e b891330 9c5668e b891330 9c5668e b891330 9c5668e b891330 9c5668e b891330 9c5668e b891330 9c5668e b891330 9c5668e b891330 9c5668e b891330 9c5668e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 500,
"global_step": 2060,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.4854368932038835,
"grad_norm": 0.9415706992149353,
"learning_rate": 8.794946550048592e-05,
"loss": 0.8636,
"step": 250
},
{
"epoch": 0.970873786407767,
"grad_norm": 0.6905472278594971,
"learning_rate": 7.580174927113704e-05,
"loss": 0.4257,
"step": 500
},
{
"epoch": 1.4563106796116505,
"grad_norm": 0.6437392234802246,
"learning_rate": 6.365403304178815e-05,
"loss": 0.3792,
"step": 750
},
{
"epoch": 1.941747572815534,
"grad_norm": 0.6311036348342896,
"learning_rate": 5.150631681243926e-05,
"loss": 0.3628,
"step": 1000
},
{
"epoch": 2.4271844660194173,
"grad_norm": 0.6062882542610168,
"learning_rate": 3.9358600583090386e-05,
"loss": 0.3511,
"step": 1250
},
{
"epoch": 2.912621359223301,
"grad_norm": 0.6469098925590515,
"learning_rate": 2.72108843537415e-05,
"loss": 0.3425,
"step": 1500
},
{
"epoch": 3.3980582524271843,
"grad_norm": 0.6484191417694092,
"learning_rate": 1.5063168124392615e-05,
"loss": 0.329,
"step": 1750
},
{
"epoch": 3.883495145631068,
"grad_norm": 0.6347299218177795,
"learning_rate": 2.915451895043732e-06,
"loss": 0.3209,
"step": 2000
},
{
"epoch": 4.0,
"step": 2060,
"total_flos": 1.4445804612483994e+18,
"train_loss": 0.4188447378214123,
"train_runtime": 30284.0025,
"train_samples_per_second": 17.413,
"train_steps_per_second": 0.068
}
],
"logging_steps": 250,
"max_steps": 2060,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.4445804612483994e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}
|