{ "best_metric": 7.529487609863281, "best_model_checkpoint": "./results/checkpoint-916", "epoch": 1.0, "eval_steps": 500, "global_step": 916, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1091703056768559, "grad_norm": 25.480337142944336, "learning_rate": 2.959061135371179e-05, "loss": 56.923, "step": 100 }, { "epoch": 0.2183406113537118, "grad_norm": 39.87223815917969, "learning_rate": 2.918122270742358e-05, "loss": 46.6475, "step": 200 }, { "epoch": 0.32751091703056767, "grad_norm": 48.05048751831055, "learning_rate": 2.877183406113537e-05, "loss": 33.6867, "step": 300 }, { "epoch": 0.4366812227074236, "grad_norm": 31.941883087158203, "learning_rate": 2.8362445414847164e-05, "loss": 21.1084, "step": 400 }, { "epoch": 0.5458515283842795, "grad_norm": 55.025856018066406, "learning_rate": 2.7953056768558954e-05, "loss": 12.9495, "step": 500 }, { "epoch": 0.6550218340611353, "grad_norm": 34.957523345947266, "learning_rate": 2.7543668122270742e-05, "loss": 10.0745, "step": 600 }, { "epoch": 0.7641921397379913, "grad_norm": 24.020906448364258, "learning_rate": 2.7134279475982533e-05, "loss": 8.3541, "step": 700 }, { "epoch": 0.8733624454148472, "grad_norm": 32.709571838378906, "learning_rate": 2.6724890829694323e-05, "loss": 7.5128, "step": 800 }, { "epoch": 0.982532751091703, "grad_norm": 38.94672393798828, "learning_rate": 2.6315502183406114e-05, "loss": 7.2241, "step": 900 }, { "epoch": 1.0, "eval_avg_mae": 7.529487609863281, "eval_loss": 7.529487609863281, "eval_mae_lex": 6.992014408111572, "eval_mae_sem": 5.432034492492676, "eval_mae_syn": 10.164413452148438, "eval_runtime": 27.1764, "eval_samples_per_second": 269.609, "eval_steps_per_second": 8.426, "step": 916 } ], "logging_steps": 100, "max_steps": 7328, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1927766233338624.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }