{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 4413, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.022660321776569226, "grad_norm": 0.09582193195819855, "learning_rate": 6.666666666666667e-06, "loss": 2.4053, "step": 100 }, { "epoch": 0.04532064355313845, "grad_norm": 0.1730571836233139, "learning_rate": 1.3333333333333333e-05, "loss": 2.3825, "step": 200 }, { "epoch": 0.06798096532970768, "grad_norm": 0.3355884253978729, "learning_rate": 2e-05, "loss": 2.356, "step": 300 }, { "epoch": 0.0906412871062769, "grad_norm": 0.37008875608444214, "learning_rate": 1.9970843111690533e-05, "loss": 2.2935, "step": 400 }, { "epoch": 0.11330160888284614, "grad_norm": 0.5142782330513, "learning_rate": 1.9883542471589315e-05, "loss": 2.2561, "step": 500 }, { "epoch": 0.13596193065941536, "grad_norm": 0.42936116456985474, "learning_rate": 1.9738607162698895e-05, "loss": 2.2424, "step": 600 }, { "epoch": 0.1586222524359846, "grad_norm": 0.537521243095398, "learning_rate": 1.9536882357541958e-05, "loss": 2.242, "step": 700 }, { "epoch": 0.1812825742125538, "grad_norm": 0.602051854133606, "learning_rate": 1.927954438964115e-05, "loss": 2.214, "step": 800 }, { "epoch": 0.20394289598912305, "grad_norm": 0.5285528898239136, "learning_rate": 1.8968093893874042e-05, "loss": 2.1943, "step": 900 }, { "epoch": 0.22660321776569228, "grad_norm": 0.6450159549713135, "learning_rate": 1.8604347055704433e-05, "loss": 2.1566, "step": 1000 }, { "epoch": 0.2492635395422615, "grad_norm": 0.6594407558441162, "learning_rate": 1.8190425020319016e-05, "loss": 2.1578, "step": 1100 }, { "epoch": 0.27192386131883073, "grad_norm": 0.6740846633911133, "learning_rate": 1.7728741523428696e-05, "loss": 2.1578, "step": 1200 }, { "epoch": 0.29458418309539997, "grad_norm": 0.6054636240005493, "learning_rate": 1.722198881586411e-05, "loss": 2.1301, "step": 1300 }, { "epoch": 0.3172445048719692, "grad_norm": 0.5829110145568848, "learning_rate": 1.667312196404425e-05, "loss": 2.1366, "step": 1400 }, { "epoch": 0.3399048266485384, "grad_norm": 0.6636696457862854, "learning_rate": 1.6085341617868172e-05, "loss": 2.1301, "step": 1500 }, { "epoch": 0.3625651484251076, "grad_norm": 0.8352382779121399, "learning_rate": 1.546207534651667e-05, "loss": 2.1157, "step": 1600 }, { "epoch": 0.38522547020167686, "grad_norm": 0.6855395436286926, "learning_rate": 1.4806957651001911e-05, "loss": 2.1084, "step": 1700 }, { "epoch": 0.4078857919782461, "grad_norm": 0.8896074891090393, "learning_rate": 1.4123808770019433e-05, "loss": 2.1494, "step": 1800 }, { "epoch": 0.43054611375481533, "grad_norm": 0.7051901817321777, "learning_rate": 1.3416612402693543e-05, "loss": 2.1406, "step": 1900 }, { "epoch": 0.45320643553138457, "grad_norm": 0.9859122633934021, "learning_rate": 1.2689492478123242e-05, "loss": 2.1142, "step": 2000 }, { "epoch": 0.47586675730795375, "grad_norm": 0.9501364827156067, "learning_rate": 1.1946689107194183e-05, "loss": 2.091, "step": 2100 }, { "epoch": 0.498527079084523, "grad_norm": 1.0703001022338867, "learning_rate": 1.119253385689078e-05, "loss": 2.0765, "step": 2200 }, { "epoch": 0.5211874008610923, "grad_norm": 0.669400691986084, "learning_rate": 1.0431424491293254e-05, "loss": 2.0824, "step": 2300 }, { "epoch": 0.5438477226376615, "grad_norm": 0.7835758924484253, "learning_rate": 9.667799326554403e-06, "loss": 2.0818, "step": 2400 }, { "epoch": 0.5665080444142306, "grad_norm": 0.8207575082778931, "learning_rate": 8.906111349401949e-06, "loss": 2.1016, "step": 2500 }, { "epoch": 0.5891683661907999, "grad_norm": 0.8124341368675232, "learning_rate": 8.150802250091193e-06, "loss": 2.0647, "step": 2600 }, { "epoch": 0.6118286879673691, "grad_norm": 0.8744191527366638, "learning_rate": 7.406276521231679e-06, "loss": 2.0657, "step": 2700 }, { "epoch": 0.6344890097439384, "grad_norm": 1.0869206190109253, "learning_rate": 6.676875773527383e-06, "loss": 2.0547, "step": 2800 }, { "epoch": 0.6571493315205076, "grad_norm": 0.7237268686294556, "learning_rate": 5.966853418205035e-06, "loss": 2.124, "step": 2900 }, { "epoch": 0.6798096532970768, "grad_norm": 0.9836551547050476, "learning_rate": 5.2803498637669055e-06, "loss": 2.0877, "step": 3000 }, { "epoch": 0.7024699750736461, "grad_norm": 0.8831650614738464, "learning_rate": 4.621368371705162e-06, "loss": 2.0978, "step": 3100 }, { "epoch": 0.7251302968502152, "grad_norm": 0.8482229709625244, "learning_rate": 3.993751711972204e-06, "loss": 2.075, "step": 3200 }, { "epoch": 0.7477906186267845, "grad_norm": 0.8325951099395752, "learning_rate": 3.401159754337836e-06, "loss": 2.1016, "step": 3300 }, { "epoch": 0.7704509404033537, "grad_norm": 1.3220783472061157, "learning_rate": 2.8470481263064255e-06, "loss": 2.1096, "step": 3400 }, { "epoch": 0.793111262179923, "grad_norm": 0.8809642195701599, "learning_rate": 2.3346480620478685e-06, "loss": 2.079, "step": 3500 }, { "epoch": 0.8157715839564922, "grad_norm": 0.9344497919082642, "learning_rate": 1.866947559850839e-06, "loss": 2.1025, "step": 3600 }, { "epoch": 0.8384319057330614, "grad_norm": 0.9643566012382507, "learning_rate": 1.446673957976298e-06, "loss": 2.1116, "step": 3700 }, { "epoch": 0.8610922275096307, "grad_norm": 1.0109236240386963, "learning_rate": 1.0762780305181064e-06, "loss": 2.0662, "step": 3800 }, { "epoch": 0.8837525492861998, "grad_norm": 0.9456806182861328, "learning_rate": 7.579196960136958e-07, "loss": 2.0815, "step": 3900 }, { "epoch": 0.9064128710627691, "grad_norm": 1.229778528213501, "learning_rate": 4.934554221433741e-07, "loss": 2.0636, "step": 4000 }, { "epoch": 0.9290731928393383, "grad_norm": 0.8360131978988647, "learning_rate": 2.8442739996615956e-07, "loss": 2.0465, "step": 4100 }, { "epoch": 0.9517335146159075, "grad_norm": 0.6662079691886902, "learning_rate": 1.3205455082128228e-07, "loss": 2.0419, "step": 4200 }, { "epoch": 0.9743938363924768, "grad_norm": 0.8761087656021118, "learning_rate": 3.7225418337528685e-08, "loss": 2.0664, "step": 4300 }, { "epoch": 0.997054158169046, "grad_norm": 1.0942589044570923, "learning_rate": 4.929869997571945e-10, "loss": 2.0249, "step": 4400 }, { "epoch": 1.0, "step": 4413, "total_flos": 8.01984399409152e+16, "train_loss": 2.1353629073560736, "train_runtime": 1379.8897, "train_samples_per_second": 6.396, "train_steps_per_second": 3.198 } ], "logging_steps": 100, "max_steps": 4413, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.01984399409152e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }