|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 29.333333333333332, |
|
"eval_steps": 500, |
|
"global_step": 30, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 1.4717, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 1.4717, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 1.4497, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 8.888888888888889e-05, |
|
"loss": 1.4049, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 0.00011111111111111112, |
|
"loss": 1.3354, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 1.2413, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 0.00015555555555555556, |
|
"loss": 1.1378, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.00017777777777777779, |
|
"loss": 1.0309, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9263, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 0.00019753086419753085, |
|
"loss": 0.8314, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"learning_rate": 0.00019506172839506175, |
|
"loss": 0.7409, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 11.33, |
|
"learning_rate": 0.0001925925925925926, |
|
"loss": 0.6599, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"learning_rate": 0.00019012345679012346, |
|
"loss": 0.581, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 0.00018765432098765433, |
|
"loss": 0.4985, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 14.33, |
|
"learning_rate": 0.0001851851851851852, |
|
"loss": 0.4184, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"learning_rate": 0.00018271604938271605, |
|
"loss": 0.3421, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 16.33, |
|
"learning_rate": 0.00018024691358024692, |
|
"loss": 0.271, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 17.33, |
|
"learning_rate": 0.00017777777777777779, |
|
"loss": 0.2084, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"learning_rate": 0.00017530864197530866, |
|
"loss": 0.1555, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 19.33, |
|
"learning_rate": 0.0001728395061728395, |
|
"loss": 0.1131, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 20.33, |
|
"learning_rate": 0.00017037037037037037, |
|
"loss": 0.0872, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 21.33, |
|
"learning_rate": 0.00016790123456790124, |
|
"loss": 0.0698, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 22.33, |
|
"learning_rate": 0.0001654320987654321, |
|
"loss": 0.0578, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 23.33, |
|
"learning_rate": 0.00016296296296296295, |
|
"loss": 0.0485, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 24.33, |
|
"learning_rate": 0.00016049382716049385, |
|
"loss": 0.0412, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 25.33, |
|
"learning_rate": 0.0001580246913580247, |
|
"loss": 0.0327, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 26.33, |
|
"learning_rate": 0.00015555555555555556, |
|
"loss": 0.0264, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 27.33, |
|
"learning_rate": 0.0001530864197530864, |
|
"loss": 0.0202, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 28.33, |
|
"learning_rate": 0.0001506172839506173, |
|
"loss": 0.0155, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 29.33, |
|
"learning_rate": 0.00014814814814814815, |
|
"loss": 0.0137, |
|
"step": 30 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 90, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 2623786293657600.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|