|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.34023503077783995, |
|
"eval_steps": 100, |
|
"global_step": 38, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008953553441522105, |
|
"grad_norm": 0.36430689692497253, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5867, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.008953553441522105, |
|
"eval_loss": 1.6258726119995117, |
|
"eval_runtime": 56.5706, |
|
"eval_samples_per_second": 6.664, |
|
"eval_steps_per_second": 1.679, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01790710688304421, |
|
"grad_norm": 0.346722275018692, |
|
"learning_rate": 4e-05, |
|
"loss": 1.8107, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.026860660324566313, |
|
"grad_norm": 0.33874014019966125, |
|
"learning_rate": 6e-05, |
|
"loss": 1.4672, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.03581421376608842, |
|
"grad_norm": 0.3448643684387207, |
|
"learning_rate": 8e-05, |
|
"loss": 1.7636, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.04476776720761052, |
|
"grad_norm": 0.3282066583633423, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6535, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.053721320649132626, |
|
"grad_norm": 0.3014908730983734, |
|
"learning_rate": 0.00012, |
|
"loss": 1.5771, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.06267487409065473, |
|
"grad_norm": 0.32604771852493286, |
|
"learning_rate": 0.00014, |
|
"loss": 1.552, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.07162842753217684, |
|
"grad_norm": 0.2597745954990387, |
|
"learning_rate": 0.00016, |
|
"loss": 1.4559, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.08058198097369894, |
|
"grad_norm": 0.27925363183021545, |
|
"learning_rate": 0.00018, |
|
"loss": 1.3562, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.08953553441522104, |
|
"grad_norm": 0.30343925952911377, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5509, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09848908785674315, |
|
"grad_norm": 0.2793353796005249, |
|
"learning_rate": 0.00019937122098932428, |
|
"loss": 1.4532, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.10744264129826525, |
|
"grad_norm": 0.2764800190925598, |
|
"learning_rate": 0.00019749279121818235, |
|
"loss": 1.3795, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.11639619473978735, |
|
"grad_norm": 0.25808510184288025, |
|
"learning_rate": 0.00019438833303083678, |
|
"loss": 1.5303, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.12534974818130945, |
|
"grad_norm": 0.2491457313299179, |
|
"learning_rate": 0.0001900968867902419, |
|
"loss": 1.485, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.13430330162283155, |
|
"grad_norm": 0.22570456564426422, |
|
"learning_rate": 0.00018467241992282843, |
|
"loss": 1.4586, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.14325685506435368, |
|
"grad_norm": 0.24603070318698883, |
|
"learning_rate": 0.000178183148246803, |
|
"loss": 1.4246, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.15221040850587578, |
|
"grad_norm": 0.2065669447183609, |
|
"learning_rate": 0.00017071067811865476, |
|
"loss": 1.316, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.16116396194739788, |
|
"grad_norm": 0.23284702003002167, |
|
"learning_rate": 0.00016234898018587337, |
|
"loss": 1.4792, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.17011751538891998, |
|
"grad_norm": 0.2532147765159607, |
|
"learning_rate": 0.00015320320765153367, |
|
"loss": 1.6496, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.17907106883044208, |
|
"grad_norm": 0.3514811098575592, |
|
"learning_rate": 0.00014338837391175582, |
|
"loss": 1.3896, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18802462227196418, |
|
"grad_norm": 0.23653051257133484, |
|
"learning_rate": 0.00013302790619551674, |
|
"loss": 1.4108, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.1969781757134863, |
|
"grad_norm": 0.23037055134773254, |
|
"learning_rate": 0.00012225209339563145, |
|
"loss": 1.4471, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.2059317291550084, |
|
"grad_norm": 0.22145314514636993, |
|
"learning_rate": 0.00011119644761033078, |
|
"loss": 1.4897, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.2148852825965305, |
|
"grad_norm": 0.20780210196971893, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2546, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.2238388360380526, |
|
"grad_norm": 0.21545733511447906, |
|
"learning_rate": 8.880355238966923e-05, |
|
"loss": 1.3577, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2327923894795747, |
|
"grad_norm": 0.20867308974266052, |
|
"learning_rate": 7.774790660436858e-05, |
|
"loss": 1.4935, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.2417459429210968, |
|
"grad_norm": 0.20432667434215546, |
|
"learning_rate": 6.697209380448333e-05, |
|
"loss": 1.2985, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.2506994963626189, |
|
"grad_norm": 0.20409992337226868, |
|
"learning_rate": 5.6611626088244194e-05, |
|
"loss": 1.3887, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.259653049804141, |
|
"grad_norm": 0.19983279705047607, |
|
"learning_rate": 4.6796792348466356e-05, |
|
"loss": 1.2571, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.2686066032456631, |
|
"grad_norm": 0.20466941595077515, |
|
"learning_rate": 3.7651019814126654e-05, |
|
"loss": 1.4783, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2775601566871852, |
|
"grad_norm": 0.1904105693101883, |
|
"learning_rate": 2.9289321881345254e-05, |
|
"loss": 1.2726, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.28651371012870736, |
|
"grad_norm": 0.19994252920150757, |
|
"learning_rate": 2.181685175319702e-05, |
|
"loss": 1.3431, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.29546726357022945, |
|
"grad_norm": 0.20978009700775146, |
|
"learning_rate": 1.5327580077171587e-05, |
|
"loss": 1.5459, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.30442081701175155, |
|
"grad_norm": 0.20526275038719177, |
|
"learning_rate": 9.903113209758096e-06, |
|
"loss": 1.1755, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.31337437045327365, |
|
"grad_norm": 0.20616187155246735, |
|
"learning_rate": 5.611666969163243e-06, |
|
"loss": 1.6515, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.32232792389479575, |
|
"grad_norm": 0.21263156831264496, |
|
"learning_rate": 2.5072087818176382e-06, |
|
"loss": 1.3846, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.33128147733631785, |
|
"grad_norm": 0.19717688858509064, |
|
"learning_rate": 6.287790106757396e-07, |
|
"loss": 1.3304, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.34023503077783995, |
|
"grad_norm": 0.2078697234392166, |
|
"learning_rate": 0.0, |
|
"loss": 1.4625, |
|
"step": 38 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 38, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 2, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.3183977611264e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|