|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.2039775624681285, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002039775624681285, |
|
"eval_loss": 2.7500362396240234, |
|
"eval_runtime": 4.8128, |
|
"eval_samples_per_second": 171.624, |
|
"eval_steps_per_second": 21.609, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006119326874043855, |
|
"grad_norm": 119.72549438476562, |
|
"learning_rate": 3e-05, |
|
"loss": 10.7627, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01223865374808771, |
|
"grad_norm": 95.31661987304688, |
|
"learning_rate": 6e-05, |
|
"loss": 10.7585, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.018357980622131564, |
|
"grad_norm": 111.39752960205078, |
|
"learning_rate": 9e-05, |
|
"loss": 10.8945, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.018357980622131564, |
|
"eval_loss": 2.742156505584717, |
|
"eval_runtime": 4.8217, |
|
"eval_samples_per_second": 171.311, |
|
"eval_steps_per_second": 21.569, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02447730749617542, |
|
"grad_norm": 87.65267944335938, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 11.0519, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.030596634370219276, |
|
"grad_norm": 95.65772247314453, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 10.8906, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03671596124426313, |
|
"grad_norm": 92.90438079833984, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 10.9398, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03671596124426313, |
|
"eval_loss": 2.678375720977783, |
|
"eval_runtime": 4.8305, |
|
"eval_samples_per_second": 170.996, |
|
"eval_steps_per_second": 21.53, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04283528811830699, |
|
"grad_norm": 93.3251724243164, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 10.3163, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04895461499235084, |
|
"grad_norm": 105.36555480957031, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 10.66, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05507394186639469, |
|
"grad_norm": 104.30107116699219, |
|
"learning_rate": 9.145187862775209e-05, |
|
"loss": 10.3486, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.05507394186639469, |
|
"eval_loss": 2.6385228633880615, |
|
"eval_runtime": 4.8602, |
|
"eval_samples_per_second": 169.95, |
|
"eval_steps_per_second": 21.398, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06119326874043855, |
|
"grad_norm": 153.6848602294922, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 10.7138, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06731259561448241, |
|
"grad_norm": 126.95474243164062, |
|
"learning_rate": 8.473291852294987e-05, |
|
"loss": 10.0417, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.07343192248852626, |
|
"grad_norm": 170.2035369873047, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 10.5315, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07343192248852626, |
|
"eval_loss": 2.6369853019714355, |
|
"eval_runtime": 4.8791, |
|
"eval_samples_per_second": 169.294, |
|
"eval_steps_per_second": 21.315, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07955124936257012, |
|
"grad_norm": 132.4921875, |
|
"learning_rate": 7.649596321166024e-05, |
|
"loss": 10.5915, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.08567057623661398, |
|
"grad_norm": 109.94354248046875, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 10.4331, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.09178990311065782, |
|
"grad_norm": 100.11196899414062, |
|
"learning_rate": 6.710100716628344e-05, |
|
"loss": 10.891, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09178990311065782, |
|
"eval_loss": 2.6252143383026123, |
|
"eval_runtime": 4.8716, |
|
"eval_samples_per_second": 169.555, |
|
"eval_steps_per_second": 21.348, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09790922998470168, |
|
"grad_norm": 112.04855346679688, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 9.8393, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.10402855685874554, |
|
"grad_norm": 119.39227294921875, |
|
"learning_rate": 5.695865504800327e-05, |
|
"loss": 10.1454, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.11014788373278939, |
|
"grad_norm": 202.7808837890625, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 10.4947, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.11014788373278939, |
|
"eval_loss": 2.626685857772827, |
|
"eval_runtime": 4.8065, |
|
"eval_samples_per_second": 171.849, |
|
"eval_steps_per_second": 21.637, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.11626721060683325, |
|
"grad_norm": 114.66297149658203, |
|
"learning_rate": 4.6512176312793736e-05, |
|
"loss": 10.7594, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.1223865374808771, |
|
"grad_norm": 120.82008361816406, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 10.5164, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12850586435492095, |
|
"grad_norm": 128.4127197265625, |
|
"learning_rate": 3.6218132209150045e-05, |
|
"loss": 10.219, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.12850586435492095, |
|
"eval_loss": 2.6237473487854004, |
|
"eval_runtime": 4.8342, |
|
"eval_samples_per_second": 170.867, |
|
"eval_steps_per_second": 21.513, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.13462519122896482, |
|
"grad_norm": 99.31004333496094, |
|
"learning_rate": 3.12696703292044e-05, |
|
"loss": 10.3523, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.14074451810300867, |
|
"grad_norm": 106.18312072753906, |
|
"learning_rate": 2.6526421860705473e-05, |
|
"loss": 10.6466, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.14686384497705252, |
|
"grad_norm": 143.47616577148438, |
|
"learning_rate": 2.2040354826462668e-05, |
|
"loss": 10.473, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.14686384497705252, |
|
"eval_loss": 2.621034622192383, |
|
"eval_runtime": 4.8122, |
|
"eval_samples_per_second": 171.646, |
|
"eval_steps_per_second": 21.612, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1529831718510964, |
|
"grad_norm": 104.74443054199219, |
|
"learning_rate": 1.7860619515673033e-05, |
|
"loss": 10.524, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.15910249872514023, |
|
"grad_norm": 114.83618927001953, |
|
"learning_rate": 1.4033009983067452e-05, |
|
"loss": 10.5378, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.16522182559918408, |
|
"grad_norm": 140.494140625, |
|
"learning_rate": 1.0599462319663905e-05, |
|
"loss": 10.4252, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.16522182559918408, |
|
"eval_loss": 2.618023157119751, |
|
"eval_runtime": 4.8413, |
|
"eval_samples_per_second": 170.617, |
|
"eval_steps_per_second": 21.482, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.17134115247322795, |
|
"grad_norm": 105.02456665039062, |
|
"learning_rate": 7.597595192178702e-06, |
|
"loss": 10.2756, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.1774604793472718, |
|
"grad_norm": 115.14602661132812, |
|
"learning_rate": 5.060297685041659e-06, |
|
"loss": 10.1504, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.18357980622131564, |
|
"grad_norm": 102.13778686523438, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 10.736, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.18357980622131564, |
|
"eval_loss": 2.619086742401123, |
|
"eval_runtime": 4.8863, |
|
"eval_samples_per_second": 169.043, |
|
"eval_steps_per_second": 21.284, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.18969913309535952, |
|
"grad_norm": 105.03943634033203, |
|
"learning_rate": 1.4852136862001764e-06, |
|
"loss": 10.5, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.19581845996940336, |
|
"grad_norm": 116.5811767578125, |
|
"learning_rate": 4.865965629214819e-07, |
|
"loss": 10.7438, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.2019377868434472, |
|
"grad_norm": 120.85993957519531, |
|
"learning_rate": 3.04586490452119e-08, |
|
"loss": 10.4505, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2019377868434472, |
|
"eval_loss": 2.619232177734375, |
|
"eval_runtime": 4.8553, |
|
"eval_samples_per_second": 170.123, |
|
"eval_steps_per_second": 21.42, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3154709430927360.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|