File size: 3,411 Bytes
0bedf29 2c60a6c 0bedf29 2c60a6c 0bedf29 ce26961 23e223e b91ab65 7971b82 6f1fe97 7971b82 0310648 b03d824 84251f4 939d39c c69fadc 6f1fe97 c69fadc 98bde4b a184528 8fd4bee 0bedf29 2c60a6c 0bedf29 2c60a6c 0bedf29 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 6948,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2158894645941278,
"grad_norm": 1.0430593490600586,
"learning_rate": 1.856649395509499e-05,
"loss": 0.8043,
"step": 500
},
{
"epoch": 0.4317789291882556,
"grad_norm": 0.8806478977203369,
"learning_rate": 1.7127230857800806e-05,
"loss": 0.4945,
"step": 1000
},
{
"epoch": 0.6476683937823834,
"grad_norm": 0.49951252341270447,
"learning_rate": 1.568796776050662e-05,
"loss": 0.4456,
"step": 1500
},
{
"epoch": 0.8635578583765112,
"grad_norm": 0.8921486139297485,
"learning_rate": 1.4248704663212436e-05,
"loss": 0.4194,
"step": 2000
},
{
"epoch": 1.0,
"eval_loss": 0.33670297265052795,
"eval_runtime": 28.3351,
"eval_samples_per_second": 16.658,
"eval_steps_per_second": 4.164,
"step": 2316
},
{
"epoch": 1.079447322970639,
"grad_norm": 3.1095924377441406,
"learning_rate": 1.280944156591825e-05,
"loss": 0.4036,
"step": 2500
},
{
"epoch": 1.2953367875647668,
"grad_norm": 1.0692085027694702,
"learning_rate": 1.1370178468624064e-05,
"loss": 0.3879,
"step": 3000
},
{
"epoch": 1.5112262521588946,
"grad_norm": 0.9439179301261902,
"learning_rate": 9.93091537132988e-06,
"loss": 0.3728,
"step": 3500
},
{
"epoch": 1.7271157167530224,
"grad_norm": 0.9635007381439209,
"learning_rate": 8.491652274035695e-06,
"loss": 0.3675,
"step": 4000
},
{
"epoch": 1.9430051813471503,
"grad_norm": 1.1892434358596802,
"learning_rate": 7.052389176741509e-06,
"loss": 0.362,
"step": 4500
},
{
"epoch": 2.0,
"eval_loss": 0.3095574975013733,
"eval_runtime": 28.3225,
"eval_samples_per_second": 16.665,
"eval_steps_per_second": 4.166,
"step": 4632
},
{
"epoch": 2.158894645941278,
"grad_norm": 0.6397312879562378,
"learning_rate": 5.613126079447323e-06,
"loss": 0.3467,
"step": 5000
},
{
"epoch": 2.3747841105354057,
"grad_norm": 0.6125323176383972,
"learning_rate": 4.173862982153138e-06,
"loss": 0.3513,
"step": 5500
},
{
"epoch": 2.5906735751295336,
"grad_norm": 0.5765931606292725,
"learning_rate": 2.734599884858952e-06,
"loss": 0.3428,
"step": 6000
},
{
"epoch": 2.8065630397236614,
"grad_norm": 0.8941567540168762,
"learning_rate": 1.2982153137593554e-06,
"loss": 0.353,
"step": 6500
}
],
"logging_steps": 500,
"max_steps": 6948,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.692233082077184e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|