{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 31150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 9.94649545211343e-06, "loss": 4.4121, "step": 500 }, { "epoch": 0.32, "learning_rate": 9.892990904226861e-06, "loss": 0.1731, "step": 1000 }, { "epoch": 0.48, "learning_rate": 9.83948635634029e-06, "loss": 0.1161, "step": 1500 }, { "epoch": 0.64, "learning_rate": 9.78598180845372e-06, "loss": 0.1059, "step": 2000 }, { "epoch": 0.8, "learning_rate": 9.73247726056715e-06, "loss": 0.1011, "step": 2500 }, { "epoch": 0.96, "learning_rate": 9.67897271268058e-06, "loss": 0.1013, "step": 3000 }, { "epoch": 1.0, "eval_loss": 0.08340005576610565, "eval_runtime": 17.1185, "eval_samples_per_second": 29.208, "eval_steps_per_second": 7.302, "step": 3115 }, { "epoch": 1.12, "learning_rate": 9.62546816479401e-06, "loss": 0.0987, "step": 3500 }, { "epoch": 1.28, "learning_rate": 9.571963616907438e-06, "loss": 0.0946, "step": 4000 }, { "epoch": 1.44, "learning_rate": 9.518459069020868e-06, "loss": 0.0956, "step": 4500 }, { "epoch": 1.61, "learning_rate": 9.464954521134298e-06, "loss": 0.0984, "step": 5000 }, { "epoch": 1.77, "learning_rate": 9.411449973247728e-06, "loss": 0.0949, "step": 5500 }, { "epoch": 1.93, "learning_rate": 9.357945425361158e-06, "loss": 0.0929, "step": 6000 }, { "epoch": 2.0, "eval_loss": 0.08016223460435867, "eval_runtime": 17.1151, "eval_samples_per_second": 29.214, "eval_steps_per_second": 7.304, "step": 6230 }, { "epoch": 2.09, "learning_rate": 9.304440877474586e-06, "loss": 0.0909, "step": 6500 }, { "epoch": 2.25, "learning_rate": 9.250936329588016e-06, "loss": 0.0917, "step": 7000 }, { "epoch": 2.41, "learning_rate": 9.197431781701446e-06, "loss": 0.0913, "step": 7500 }, { "epoch": 2.57, "learning_rate": 9.143927233814876e-06, "loss": 0.0912, "step": 8000 }, { "epoch": 2.73, "learning_rate": 9.090422685928304e-06, "loss": 0.0904, "step": 8500 }, { "epoch": 2.89, "learning_rate": 9.036918138041734e-06, "loss": 0.0908, "step": 9000 }, { "epoch": 3.0, "eval_loss": 0.0781576856970787, "eval_runtime": 17.0736, "eval_samples_per_second": 29.285, "eval_steps_per_second": 7.321, "step": 9345 }, { "epoch": 3.05, "learning_rate": 8.983413590155164e-06, "loss": 0.089, "step": 9500 }, { "epoch": 3.21, "learning_rate": 8.929909042268593e-06, "loss": 0.0874, "step": 10000 }, { "epoch": 3.37, "learning_rate": 8.876404494382023e-06, "loss": 0.0868, "step": 10500 }, { "epoch": 3.53, "learning_rate": 8.822899946495453e-06, "loss": 0.0868, "step": 11000 }, { "epoch": 3.69, "learning_rate": 8.769395398608883e-06, "loss": 0.0877, "step": 11500 }, { "epoch": 3.85, "learning_rate": 8.715890850722311e-06, "loss": 0.0876, "step": 12000 }, { "epoch": 4.0, "eval_loss": 0.07677410542964935, "eval_runtime": 17.0783, "eval_samples_per_second": 29.277, "eval_steps_per_second": 7.319, "step": 12460 }, { "epoch": 4.01, "learning_rate": 8.662386302835741e-06, "loss": 0.0887, "step": 12500 }, { "epoch": 4.17, "learning_rate": 8.608881754949171e-06, "loss": 0.0848, "step": 13000 }, { "epoch": 4.33, "learning_rate": 8.555377207062601e-06, "loss": 0.0825, "step": 13500 }, { "epoch": 4.49, "learning_rate": 8.501872659176031e-06, "loss": 0.085, "step": 14000 }, { "epoch": 4.65, "learning_rate": 8.44836811128946e-06, "loss": 0.0858, "step": 14500 }, { "epoch": 4.82, "learning_rate": 8.39486356340289e-06, "loss": 0.0855, "step": 15000 }, { "epoch": 4.98, "learning_rate": 8.34135901551632e-06, "loss": 0.0857, "step": 15500 }, { "epoch": 5.0, "eval_loss": 0.07618943601846695, "eval_runtime": 17.0787, "eval_samples_per_second": 29.276, "eval_steps_per_second": 7.319, "step": 15575 }, { "epoch": 5.14, "learning_rate": 8.28785446762975e-06, "loss": 0.0833, "step": 16000 }, { "epoch": 5.3, "learning_rate": 8.234349919743178e-06, "loss": 0.0819, "step": 16500 }, { "epoch": 5.46, "learning_rate": 8.180845371856608e-06, "loss": 0.0842, "step": 17000 }, { "epoch": 5.62, "learning_rate": 8.127340823970038e-06, "loss": 0.0814, "step": 17500 }, { "epoch": 5.78, "learning_rate": 8.073836276083468e-06, "loss": 0.0828, "step": 18000 }, { "epoch": 5.94, "learning_rate": 8.020331728196898e-06, "loss": 0.0847, "step": 18500 }, { "epoch": 6.0, "eval_loss": 0.07566038519144058, "eval_runtime": 17.0768, "eval_samples_per_second": 29.28, "eval_steps_per_second": 7.32, "step": 18690 }, { "epoch": 6.1, "learning_rate": 7.966827180310326e-06, "loss": 0.0826, "step": 19000 }, { "epoch": 6.26, "learning_rate": 7.913322632423756e-06, "loss": 0.0791, "step": 19500 }, { "epoch": 6.42, "learning_rate": 7.859818084537186e-06, "loss": 0.0809, "step": 20000 }, { "epoch": 6.58, "learning_rate": 7.806313536650616e-06, "loss": 0.083, "step": 20500 }, { "epoch": 6.74, "learning_rate": 7.752808988764046e-06, "loss": 0.0817, "step": 21000 }, { "epoch": 6.9, "learning_rate": 7.699304440877475e-06, "loss": 0.0815, "step": 21500 }, { "epoch": 7.0, "eval_loss": 0.07512963563203812, "eval_runtime": 17.0959, "eval_samples_per_second": 29.247, "eval_steps_per_second": 7.312, "step": 21805 }, { "epoch": 7.06, "learning_rate": 7.645799892990905e-06, "loss": 0.0812, "step": 22000 }, { "epoch": 7.22, "learning_rate": 7.592295345104335e-06, "loss": 0.0805, "step": 22500 }, { "epoch": 7.38, "learning_rate": 7.538790797217765e-06, "loss": 0.0808, "step": 23000 }, { "epoch": 7.54, "learning_rate": 7.485286249331194e-06, "loss": 0.0792, "step": 23500 }, { "epoch": 7.7, "learning_rate": 7.431781701444624e-06, "loss": 0.0792, "step": 24000 }, { "epoch": 7.87, "learning_rate": 7.378277153558053e-06, "loss": 0.0807, "step": 24500 }, { "epoch": 8.0, "eval_loss": 0.07496295124292374, "eval_runtime": 17.08, "eval_samples_per_second": 29.274, "eval_steps_per_second": 7.319, "step": 24920 }, { "epoch": 8.03, "learning_rate": 7.324772605671483e-06, "loss": 0.0784, "step": 25000 }, { "epoch": 8.19, "learning_rate": 7.271268057784913e-06, "loss": 0.0776, "step": 25500 }, { "epoch": 8.35, "learning_rate": 7.217763509898342e-06, "loss": 0.0764, "step": 26000 }, { "epoch": 8.51, "learning_rate": 7.164258962011772e-06, "loss": 0.0792, "step": 26500 }, { "epoch": 8.67, "learning_rate": 7.110754414125201e-06, "loss": 0.0802, "step": 27000 }, { "epoch": 8.83, "learning_rate": 7.057249866238631e-06, "loss": 0.0803, "step": 27500 }, { "epoch": 8.99, "learning_rate": 7.003745318352061e-06, "loss": 0.0779, "step": 28000 }, { "epoch": 9.0, "eval_loss": 0.0747738629579544, "eval_runtime": 17.0877, "eval_samples_per_second": 29.261, "eval_steps_per_second": 7.315, "step": 28035 }, { "epoch": 9.15, "learning_rate": 6.950240770465491e-06, "loss": 0.0765, "step": 28500 }, { "epoch": 9.31, "learning_rate": 6.896736222578921e-06, "loss": 0.0762, "step": 29000 }, { "epoch": 9.47, "learning_rate": 6.84323167469235e-06, "loss": 0.0777, "step": 29500 }, { "epoch": 9.63, "learning_rate": 6.789727126805778e-06, "loss": 0.078, "step": 30000 }, { "epoch": 9.79, "learning_rate": 6.736222578919208e-06, "loss": 0.0782, "step": 30500 }, { "epoch": 9.95, "learning_rate": 6.682718031032638e-06, "loss": 0.0757, "step": 31000 }, { "epoch": 10.0, "eval_loss": 0.07480964064598083, "eval_runtime": 17.0749, "eval_samples_per_second": 29.283, "eval_steps_per_second": 7.321, "step": 31150 } ], "max_steps": 93450, "num_train_epochs": 30, "total_flos": 8.53207661150208e+16, "trial_name": null, "trial_params": null }