{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.34023503077783995, "eval_steps": 100, "global_step": 38, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008953553441522105, "grad_norm": 0.36430689692497253, "learning_rate": 2e-05, "loss": 1.5867, "step": 1 }, { "epoch": 0.008953553441522105, "eval_loss": 1.6258726119995117, "eval_runtime": 56.5706, "eval_samples_per_second": 6.664, "eval_steps_per_second": 1.679, "step": 1 }, { "epoch": 0.01790710688304421, "grad_norm": 0.346722275018692, "learning_rate": 4e-05, "loss": 1.8107, "step": 2 }, { "epoch": 0.026860660324566313, "grad_norm": 0.33874014019966125, "learning_rate": 6e-05, "loss": 1.4672, "step": 3 }, { "epoch": 0.03581421376608842, "grad_norm": 0.3448643684387207, "learning_rate": 8e-05, "loss": 1.7636, "step": 4 }, { "epoch": 0.04476776720761052, "grad_norm": 0.3282066583633423, "learning_rate": 0.0001, "loss": 1.6535, "step": 5 }, { "epoch": 0.053721320649132626, "grad_norm": 0.3014908730983734, "learning_rate": 0.00012, "loss": 1.5771, "step": 6 }, { "epoch": 0.06267487409065473, "grad_norm": 0.32604771852493286, "learning_rate": 0.00014, "loss": 1.552, "step": 7 }, { "epoch": 0.07162842753217684, "grad_norm": 0.2597745954990387, "learning_rate": 0.00016, "loss": 1.4559, "step": 8 }, { "epoch": 0.08058198097369894, "grad_norm": 0.27925363183021545, "learning_rate": 0.00018, "loss": 1.3562, "step": 9 }, { "epoch": 0.08953553441522104, "grad_norm": 0.30343925952911377, "learning_rate": 0.0002, "loss": 1.5509, "step": 10 }, { "epoch": 0.09848908785674315, "grad_norm": 0.2793353796005249, "learning_rate": 0.00019937122098932428, "loss": 1.4532, "step": 11 }, { "epoch": 0.10744264129826525, "grad_norm": 0.2764800190925598, "learning_rate": 0.00019749279121818235, "loss": 1.3795, "step": 12 }, { "epoch": 0.11639619473978735, "grad_norm": 0.25808510184288025, "learning_rate": 0.00019438833303083678, "loss": 1.5303, "step": 13 }, { "epoch": 0.12534974818130945, "grad_norm": 0.2491457313299179, "learning_rate": 0.0001900968867902419, "loss": 1.485, "step": 14 }, { "epoch": 0.13430330162283155, "grad_norm": 0.22570456564426422, "learning_rate": 0.00018467241992282843, "loss": 1.4586, "step": 15 }, { "epoch": 0.14325685506435368, "grad_norm": 0.24603070318698883, "learning_rate": 0.000178183148246803, "loss": 1.4246, "step": 16 }, { "epoch": 0.15221040850587578, "grad_norm": 0.2065669447183609, "learning_rate": 0.00017071067811865476, "loss": 1.316, "step": 17 }, { "epoch": 0.16116396194739788, "grad_norm": 0.23284702003002167, "learning_rate": 0.00016234898018587337, "loss": 1.4792, "step": 18 }, { "epoch": 0.17011751538891998, "grad_norm": 0.2532147765159607, "learning_rate": 0.00015320320765153367, "loss": 1.6496, "step": 19 }, { "epoch": 0.17907106883044208, "grad_norm": 0.3514811098575592, "learning_rate": 0.00014338837391175582, "loss": 1.3896, "step": 20 }, { "epoch": 0.18802462227196418, "grad_norm": 0.23653051257133484, "learning_rate": 0.00013302790619551674, "loss": 1.4108, "step": 21 }, { "epoch": 0.1969781757134863, "grad_norm": 0.23037055134773254, "learning_rate": 0.00012225209339563145, "loss": 1.4471, "step": 22 }, { "epoch": 0.2059317291550084, "grad_norm": 0.22145314514636993, "learning_rate": 0.00011119644761033078, "loss": 1.4897, "step": 23 }, { "epoch": 0.2148852825965305, "grad_norm": 0.20780210196971893, "learning_rate": 0.0001, "loss": 1.2546, "step": 24 }, { "epoch": 0.2238388360380526, "grad_norm": 0.21545733511447906, "learning_rate": 8.880355238966923e-05, "loss": 1.3577, "step": 25 }, { "epoch": 0.2327923894795747, "grad_norm": 0.20867308974266052, "learning_rate": 7.774790660436858e-05, "loss": 1.4935, "step": 26 }, { "epoch": 0.2417459429210968, "grad_norm": 0.20432667434215546, "learning_rate": 6.697209380448333e-05, "loss": 1.2985, "step": 27 }, { "epoch": 0.2506994963626189, "grad_norm": 0.20409992337226868, "learning_rate": 5.6611626088244194e-05, "loss": 1.3887, "step": 28 }, { "epoch": 0.259653049804141, "grad_norm": 0.19983279705047607, "learning_rate": 4.6796792348466356e-05, "loss": 1.2571, "step": 29 }, { "epoch": 0.2686066032456631, "grad_norm": 0.20466941595077515, "learning_rate": 3.7651019814126654e-05, "loss": 1.4783, "step": 30 }, { "epoch": 0.2775601566871852, "grad_norm": 0.1904105693101883, "learning_rate": 2.9289321881345254e-05, "loss": 1.2726, "step": 31 }, { "epoch": 0.28651371012870736, "grad_norm": 0.19994252920150757, "learning_rate": 2.181685175319702e-05, "loss": 1.3431, "step": 32 }, { "epoch": 0.29546726357022945, "grad_norm": 0.20978009700775146, "learning_rate": 1.5327580077171587e-05, "loss": 1.5459, "step": 33 }, { "epoch": 0.30442081701175155, "grad_norm": 0.20526275038719177, "learning_rate": 9.903113209758096e-06, "loss": 1.1755, "step": 34 }, { "epoch": 0.31337437045327365, "grad_norm": 0.20616187155246735, "learning_rate": 5.611666969163243e-06, "loss": 1.6515, "step": 35 }, { "epoch": 0.32232792389479575, "grad_norm": 0.21263156831264496, "learning_rate": 2.5072087818176382e-06, "loss": 1.3846, "step": 36 }, { "epoch": 0.33128147733631785, "grad_norm": 0.19717688858509064, "learning_rate": 6.287790106757396e-07, "loss": 1.3304, "step": 37 }, { "epoch": 0.34023503077783995, "grad_norm": 0.2078697234392166, "learning_rate": 0.0, "loss": 1.4625, "step": 38 } ], "logging_steps": 1, "max_steps": 38, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.3183977611264e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }