taelor-setup / checkpoint-30 /trainer_state.json
persival-cogtive's picture
Upload folder using huggingface_hub
1884cfe
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 29.333333333333332,
"eval_steps": 500,
"global_step": 30,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.33,
"learning_rate": 2.2222222222222223e-05,
"loss": 1.4717,
"step": 1
},
{
"epoch": 1.33,
"learning_rate": 4.4444444444444447e-05,
"loss": 1.4717,
"step": 2
},
{
"epoch": 2.33,
"learning_rate": 6.666666666666667e-05,
"loss": 1.4497,
"step": 3
},
{
"epoch": 3.33,
"learning_rate": 8.888888888888889e-05,
"loss": 1.4049,
"step": 4
},
{
"epoch": 4.33,
"learning_rate": 0.00011111111111111112,
"loss": 1.3354,
"step": 5
},
{
"epoch": 5.33,
"learning_rate": 0.00013333333333333334,
"loss": 1.2413,
"step": 6
},
{
"epoch": 6.33,
"learning_rate": 0.00015555555555555556,
"loss": 1.1378,
"step": 7
},
{
"epoch": 7.33,
"learning_rate": 0.00017777777777777779,
"loss": 1.0309,
"step": 8
},
{
"epoch": 8.33,
"learning_rate": 0.0002,
"loss": 0.9263,
"step": 9
},
{
"epoch": 9.33,
"learning_rate": 0.00019753086419753085,
"loss": 0.8314,
"step": 10
},
{
"epoch": 10.33,
"learning_rate": 0.00019506172839506175,
"loss": 0.7409,
"step": 11
},
{
"epoch": 11.33,
"learning_rate": 0.0001925925925925926,
"loss": 0.6599,
"step": 12
},
{
"epoch": 12.33,
"learning_rate": 0.00019012345679012346,
"loss": 0.581,
"step": 13
},
{
"epoch": 13.33,
"learning_rate": 0.00018765432098765433,
"loss": 0.4985,
"step": 14
},
{
"epoch": 14.33,
"learning_rate": 0.0001851851851851852,
"loss": 0.4184,
"step": 15
},
{
"epoch": 15.33,
"learning_rate": 0.00018271604938271605,
"loss": 0.3421,
"step": 16
},
{
"epoch": 16.33,
"learning_rate": 0.00018024691358024692,
"loss": 0.271,
"step": 17
},
{
"epoch": 17.33,
"learning_rate": 0.00017777777777777779,
"loss": 0.2084,
"step": 18
},
{
"epoch": 18.33,
"learning_rate": 0.00017530864197530866,
"loss": 0.1555,
"step": 19
},
{
"epoch": 19.33,
"learning_rate": 0.0001728395061728395,
"loss": 0.1131,
"step": 20
},
{
"epoch": 20.33,
"learning_rate": 0.00017037037037037037,
"loss": 0.0872,
"step": 21
},
{
"epoch": 21.33,
"learning_rate": 0.00016790123456790124,
"loss": 0.0698,
"step": 22
},
{
"epoch": 22.33,
"learning_rate": 0.0001654320987654321,
"loss": 0.0578,
"step": 23
},
{
"epoch": 23.33,
"learning_rate": 0.00016296296296296295,
"loss": 0.0485,
"step": 24
},
{
"epoch": 24.33,
"learning_rate": 0.00016049382716049385,
"loss": 0.0412,
"step": 25
},
{
"epoch": 25.33,
"learning_rate": 0.0001580246913580247,
"loss": 0.0327,
"step": 26
},
{
"epoch": 26.33,
"learning_rate": 0.00015555555555555556,
"loss": 0.0264,
"step": 27
},
{
"epoch": 27.33,
"learning_rate": 0.0001530864197530864,
"loss": 0.0202,
"step": 28
},
{
"epoch": 28.33,
"learning_rate": 0.0001506172839506173,
"loss": 0.0155,
"step": 29
},
{
"epoch": 29.33,
"learning_rate": 0.00014814814814814815,
"loss": 0.0137,
"step": 30
}
],
"logging_steps": 1,
"max_steps": 90,
"num_train_epochs": 30,
"save_steps": 500,
"total_flos": 2623786293657600.0,
"trial_name": null,
"trial_params": null
}