{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9767441860465116, "eval_steps": 500, "global_step": 192, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15503875968992248, "grad_norm": 0.7852384448051453, "learning_rate": 1.75e-05, "loss": 2.506, "step": 10 }, { "epoch": 0.31007751937984496, "grad_norm": 0.5620294809341431, "learning_rate": 4.25e-05, "loss": 1.9499, "step": 20 }, { "epoch": 0.46511627906976744, "grad_norm": 0.818453848361969, "learning_rate": 4.9795940299380575e-05, "loss": 1.8121, "step": 30 }, { "epoch": 0.6201550387596899, "grad_norm": 0.5289535522460938, "learning_rate": 4.880447529310118e-05, "loss": 1.789, "step": 40 }, { "epoch": 0.7751937984496124, "grad_norm": 0.8725740313529968, "learning_rate": 4.723355372206297e-05, "loss": 1.7512, "step": 50 }, { "epoch": 0.9302325581395349, "grad_norm": 0.7308263182640076, "learning_rate": 4.478741221073136e-05, "loss": 1.6186, "step": 60 }, { "epoch": 1.0852713178294573, "grad_norm": 0.7901429533958435, "learning_rate": 4.1682970280555986e-05, "loss": 1.6679, "step": 70 }, { "epoch": 1.2403100775193798, "grad_norm": 0.4563349783420563, "learning_rate": 3.8023508512198256e-05, "loss": 1.266, "step": 80 }, { "epoch": 1.3953488372093024, "grad_norm": 0.5261668562889099, "learning_rate": 3.393077224502832e-05, "loss": 1.3869, "step": 90 }, { "epoch": 1.550387596899225, "grad_norm": 0.8815342783927917, "learning_rate": 2.954092127448591e-05, "loss": 1.2376, "step": 100 }, { "epoch": 1.7054263565891472, "grad_norm": 0.727622926235199, "learning_rate": 2.5e-05, "loss": 1.291, "step": 110 }, { "epoch": 1.8604651162790697, "grad_norm": 0.6443896293640137, "learning_rate": 2.0459078725514092e-05, "loss": 1.3153, "step": 120 }, { "epoch": 2.0155038759689923, "grad_norm": 0.7863492965698242, "learning_rate": 1.6069227754971683e-05, "loss": 1.3498, "step": 130 }, { "epoch": 2.1705426356589146, "grad_norm": 0.9250425696372986, "learning_rate": 1.1976491487801748e-05, "loss": 0.9415, "step": 140 }, { "epoch": 2.3255813953488373, "grad_norm": 0.9809682965278625, "learning_rate": 8.317029719444016e-06, "loss": 0.7475, "step": 150 }, { "epoch": 2.4806201550387597, "grad_norm": 0.8305571675300598, "learning_rate": 5.2125877892686496e-06, "loss": 0.6603, "step": 160 }, { "epoch": 2.6356589147286824, "grad_norm": 0.6964967846870422, "learning_rate": 2.7664462779370293e-06, "loss": 0.6403, "step": 170 }, { "epoch": 2.7906976744186047, "grad_norm": 1.0790668725967407, "learning_rate": 1.0599850022898539e-06, "loss": 0.8335, "step": 180 }, { "epoch": 2.945736434108527, "grad_norm": 0.8265672326087952, "learning_rate": 1.4997561900135238e-07, "loss": 0.6972, "step": 190 }, { "epoch": 2.9767441860465116, "step": 192, "total_flos": 1.1519499230222746e+17, "train_loss": 1.3342496789991856, "train_runtime": 2508.9029, "train_samples_per_second": 0.617, "train_steps_per_second": 0.077 } ], "logging_steps": 10, "max_steps": 192, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1519499230222746e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }