File size: 3,230 Bytes
69801c1 f93d491 69801c1 f93d491 69801c1 7051856 69801c1 a72edac 25241dc b902a6a 779f68e 5dde5f1 d46a7fc e7fd55f 8d0a493 2a5af68 426e64f f783eea f93d491 69801c1 7051856 69801c1 f93d491 69801c1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.3507896924355778,
"eval_steps": 500,
"global_step": 6500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10390689941812137,
"grad_norm": 3.488266944885254,
"learning_rate": 1.9311443613189252e-05,
"loss": 0.8436,
"step": 500
},
{
"epoch": 0.20781379883624274,
"grad_norm": 0.8393851518630981,
"learning_rate": 1.8618730950401776e-05,
"loss": 0.5316,
"step": 1000
},
{
"epoch": 0.3117206982543641,
"grad_norm": 1.4351526498794556,
"learning_rate": 1.79260182876143e-05,
"loss": 0.483,
"step": 1500
},
{
"epoch": 0.41562759767248547,
"grad_norm": 1.4630037546157837,
"learning_rate": 1.7233305624826823e-05,
"loss": 0.4516,
"step": 2000
},
{
"epoch": 0.5195344970906068,
"grad_norm": 0.854028046131134,
"learning_rate": 1.6540592962039347e-05,
"loss": 0.4387,
"step": 2500
},
{
"epoch": 0.6234413965087282,
"grad_norm": 0.8339985013008118,
"learning_rate": 1.584788029925187e-05,
"loss": 0.4192,
"step": 3000
},
{
"epoch": 0.7273482959268496,
"grad_norm": 1.0237222909927368,
"learning_rate": 1.5155167636464397e-05,
"loss": 0.422,
"step": 3500
},
{
"epoch": 0.8312551953449709,
"grad_norm": 1.0825960636138916,
"learning_rate": 1.446245497367692e-05,
"loss": 0.4065,
"step": 4000
},
{
"epoch": 0.9351620947630923,
"grad_norm": 1.4909119606018066,
"learning_rate": 1.3769742310889445e-05,
"loss": 0.4031,
"step": 4500
},
{
"epoch": 1.0,
"eval_loss": 0.3220335841178894,
"eval_runtime": 31.3207,
"eval_samples_per_second": 15.772,
"eval_steps_per_second": 7.886,
"step": 4812
},
{
"epoch": 1.0390689941812137,
"grad_norm": 0.6415828466415405,
"learning_rate": 1.3077029648101969e-05,
"loss": 0.4064,
"step": 5000
},
{
"epoch": 1.142975893599335,
"grad_norm": 0.6136273741722107,
"learning_rate": 1.2384316985314493e-05,
"loss": 0.3599,
"step": 5500
},
{
"epoch": 1.2468827930174564,
"grad_norm": 0.49429360032081604,
"learning_rate": 1.1692989747852592e-05,
"loss": 0.3737,
"step": 6000
},
{
"epoch": 1.3507896924355778,
"grad_norm": 1.103445053100586,
"learning_rate": 1.1000277085065116e-05,
"loss": 0.3626,
"step": 6500
}
],
"logging_steps": 500,
"max_steps": 14436,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 7915843619389440.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|