{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.74074074074074, "eval_steps": 500, "global_step": 1480, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 52.495697021484375, "learning_rate": 9.9464649786582e-06, "loss": 21.8993, "step": 75 }, { "epoch": 1.0, "eval_classes": 0, "eval_loss": 12.443974494934082, "eval_map": 0.6744, "eval_map_50": 0.9348, "eval_map_75": 0.7871, "eval_map_large": 0.9595, "eval_map_medium": 0.7115, "eval_map_per_class": 0.6744, "eval_map_small": 0.4962, "eval_mar_1": 0.0394, "eval_mar_10": 0.3779, "eval_mar_100": 0.7627, "eval_mar_100_per_class": 0.7627, "eval_mar_large": 0.9741, "eval_mar_medium": 0.7977, "eval_mar_small": 0.5896, "eval_runtime": 301.7158, "eval_samples_per_second": 0.437, "eval_steps_per_second": 0.056, "step": 75 }, { "epoch": 2.0, "grad_norm": 35.35026550292969, "learning_rate": 9.768230005822394e-06, "loss": 10.836, "step": 150 }, { "epoch": 2.0, "eval_classes": 0, "eval_loss": 10.104583740234375, "eval_map": 0.7114, "eval_map_50": 0.9553, "eval_map_75": 0.8305, "eval_map_large": 0.9806, "eval_map_medium": 0.7495, "eval_map_per_class": 0.7114, "eval_map_small": 0.5421, "eval_mar_1": 0.0399, "eval_mar_10": 0.3853, "eval_mar_100": 0.7924, "eval_mar_100_per_class": 0.7924, "eval_mar_large": 0.9864, "eval_mar_medium": 0.8267, "eval_mar_small": 0.6239, "eval_runtime": 301.7621, "eval_samples_per_second": 0.437, "eval_steps_per_second": 0.056, "step": 150 }, { "epoch": 3.0, "grad_norm": 77.25425720214844, "learning_rate": 9.469397461053838e-06, "loss": 9.422, "step": 225 }, { "epoch": 3.0, "eval_classes": 0, "eval_loss": 9.384869575500488, "eval_map": 0.7277, "eval_map_50": 0.9599, "eval_map_75": 0.8501, "eval_map_large": 0.9829, "eval_map_medium": 0.7674, "eval_map_per_class": 0.7277, "eval_map_small": 0.5591, "eval_mar_1": 0.0398, "eval_mar_10": 0.3882, "eval_mar_100": 0.8057, "eval_mar_100_per_class": 0.8057, "eval_mar_large": 0.9852, "eval_mar_medium": 0.8385, "eval_mar_small": 0.6454, "eval_runtime": 301.8767, "eval_samples_per_second": 0.437, "eval_steps_per_second": 0.056, "step": 225 }, { "epoch": 4.0, "grad_norm": 25.18535614013672, "learning_rate": 9.057525385652877e-06, "loss": 8.7626, "step": 300 }, { "epoch": 4.0, "eval_classes": 0, "eval_loss": 8.737225532531738, "eval_map": 0.7337, "eval_map_50": 0.9632, "eval_map_75": 0.8564, "eval_map_large": 0.9825, "eval_map_medium": 0.7713, "eval_map_per_class": 0.7337, "eval_map_small": 0.5685, "eval_mar_1": 0.0401, "eval_mar_10": 0.3902, "eval_mar_100": 0.8122, "eval_mar_100_per_class": 0.8122, "eval_mar_large": 0.9889, "eval_mar_medium": 0.8441, "eval_mar_small": 0.6565, "eval_runtime": 301.9674, "eval_samples_per_second": 0.437, "eval_steps_per_second": 0.056, "step": 300 }, { "epoch": 5.0, "grad_norm": 100.30033111572266, "learning_rate": 8.550511739408428e-06, "loss": 8.2403, "step": 375 }, { "epoch": 5.0, "eval_classes": 0, "eval_loss": 8.39384651184082, "eval_map": 0.7416, "eval_map_50": 0.9673, "eval_map_75": 0.8706, "eval_map_large": 0.991, "eval_map_medium": 0.7812, "eval_map_per_class": 0.7416, "eval_map_small": 0.5659, "eval_mar_1": 0.0403, "eval_mar_10": 0.3914, "eval_mar_100": 0.8177, "eval_mar_100_per_class": 0.8177, "eval_mar_large": 0.9926, "eval_mar_medium": 0.8508, "eval_mar_small": 0.657, "eval_runtime": 301.5787, "eval_samples_per_second": 0.438, "eval_steps_per_second": 0.056, "step": 375 }, { "epoch": 6.0, "grad_norm": 60.029884338378906, "learning_rate": 7.947506126566009e-06, "loss": 7.8836, "step": 450 }, { "epoch": 6.0, "eval_classes": 0, "eval_loss": 7.927948951721191, "eval_map": 0.7504, "eval_map_50": 0.9692, "eval_map_75": 0.8794, "eval_map_large": 0.9901, "eval_map_medium": 0.7878, "eval_map_per_class": 0.7504, "eval_map_small": 0.5853, "eval_mar_1": 0.0412, "eval_mar_10": 0.3964, "eval_mar_100": 0.824, "eval_mar_100_per_class": 0.824, "eval_mar_large": 0.9914, "eval_mar_medium": 0.8561, "eval_mar_small": 0.6685, "eval_runtime": 302.1538, "eval_samples_per_second": 0.437, "eval_steps_per_second": 0.056, "step": 450 }, { "epoch": 7.0, "grad_norm": 45.95404815673828, "learning_rate": 7.2794040652413374e-06, "loss": 7.6029, "step": 525 }, { "epoch": 7.0, "eval_classes": 0, "eval_loss": 7.830236434936523, "eval_map": 0.7562, "eval_map_50": 0.9707, "eval_map_75": 0.8933, "eval_map_large": 0.9909, "eval_map_medium": 0.7925, "eval_map_per_class": 0.7562, "eval_map_small": 0.5965, "eval_mar_1": 0.0415, "eval_mar_10": 0.3969, "eval_mar_100": 0.8294, "eval_mar_100_per_class": 0.8294, "eval_mar_large": 0.9926, "eval_mar_medium": 0.8603, "eval_mar_small": 0.68, "eval_runtime": 302.0489, "eval_samples_per_second": 0.437, "eval_steps_per_second": 0.056, "step": 525 }, { "epoch": 8.0, "grad_norm": 35.335750579833984, "learning_rate": 6.555175510922047e-06, "loss": 7.3823, "step": 600 }, { "epoch": 8.0, "eval_classes": 0, "eval_loss": 7.547567844390869, "eval_map": 0.756, "eval_map_50": 0.9689, "eval_map_75": 0.8892, "eval_map_large": 0.9908, "eval_map_medium": 0.792, "eval_map_per_class": 0.756, "eval_map_small": 0.5973, "eval_mar_1": 0.0412, "eval_mar_10": 0.3982, "eval_mar_100": 0.8315, "eval_mar_100_per_class": 0.8315, "eval_mar_large": 0.9926, "eval_mar_medium": 0.8626, "eval_mar_small": 0.6809, "eval_runtime": 302.4595, "eval_samples_per_second": 0.436, "eval_steps_per_second": 0.056, "step": 600 }, { "epoch": 9.0, "grad_norm": 16.641401290893555, "learning_rate": 5.782172325201155e-06, "loss": 7.2456, "step": 675 }, { "epoch": 9.0, "eval_classes": 0, "eval_loss": 7.461709499359131, "eval_map": 0.7611, "eval_map_50": 0.969, "eval_map_75": 0.8979, "eval_map_large": 0.9902, "eval_map_medium": 0.795, "eval_map_per_class": 0.7611, "eval_map_small": 0.6055, "eval_mar_1": 0.0411, "eval_mar_10": 0.3995, "eval_mar_100": 0.8338, "eval_mar_100_per_class": 0.8338, "eval_mar_large": 0.9926, "eval_mar_medium": 0.8642, "eval_mar_small": 0.6865, "eval_runtime": 302.0761, "eval_samples_per_second": 0.437, "eval_steps_per_second": 0.056, "step": 675 }, { "epoch": 10.0, "grad_norm": 18.994029998779297, "learning_rate": 4.989386519275895e-06, "loss": 7.068, "step": 750 }, { "epoch": 10.0, "eval_classes": 0, "eval_loss": 7.323790550231934, "eval_map": 0.7627, "eval_map_50": 0.9688, "eval_map_75": 0.8988, "eval_map_large": 0.9901, "eval_map_medium": 0.7964, "eval_map_per_class": 0.7627, "eval_map_small": 0.6028, "eval_mar_1": 0.0413, "eval_mar_10": 0.3995, "eval_mar_100": 0.8347, "eval_mar_100_per_class": 0.8347, "eval_mar_large": 0.9938, "eval_mar_medium": 0.8656, "eval_mar_small": 0.685, "eval_runtime": 304.0925, "eval_samples_per_second": 0.434, "eval_steps_per_second": 0.056, "step": 750 }, { "epoch": 11.0, "grad_norm": 17.86629295349121, "learning_rate": 4.196869148389114e-06, "loss": 7.0105, "step": 825 }, { "epoch": 11.0, "eval_classes": 0, "eval_loss": 7.247533798217773, "eval_map": 0.7655, "eval_map_50": 0.9703, "eval_map_75": 0.8992, "eval_map_large": 0.991, "eval_map_medium": 0.7982, "eval_map_per_class": 0.7655, "eval_map_small": 0.6099, "eval_mar_1": 0.0409, "eval_mar_10": 0.4007, "eval_mar_100": 0.8372, "eval_mar_100_per_class": 0.8372, "eval_mar_large": 0.9938, "eval_mar_medium": 0.8679, "eval_mar_small": 0.6893, "eval_runtime": 302.7002, "eval_samples_per_second": 0.436, "eval_steps_per_second": 0.056, "step": 825 }, { "epoch": 12.0, "grad_norm": 10.187027931213379, "learning_rate": 3.424664478552887e-06, "loss": 6.9651, "step": 900 }, { "epoch": 12.0, "eval_classes": 0, "eval_loss": 7.234455108642578, "eval_map": 0.7679, "eval_map_50": 0.9709, "eval_map_75": 0.9045, "eval_map_large": 0.9899, "eval_map_medium": 0.8008, "eval_map_per_class": 0.7679, "eval_map_small": 0.6147, "eval_mar_1": 0.0412, "eval_mar_10": 0.3999, "eval_mar_100": 0.8393, "eval_mar_100_per_class": 0.8393, "eval_mar_large": 0.9938, "eval_mar_medium": 0.8692, "eval_mar_small": 0.6944, "eval_runtime": 307.3877, "eval_samples_per_second": 0.429, "eval_steps_per_second": 0.055, "step": 900 }, { "epoch": 13.0, "grad_norm": 24.03527069091797, "learning_rate": 2.6923030290801817e-06, "loss": 6.8849, "step": 975 }, { "epoch": 13.0, "eval_classes": 0, "eval_loss": 7.041961669921875, "eval_map": 0.764, "eval_map_50": 0.9698, "eval_map_75": 0.8996, "eval_map_large": 0.9899, "eval_map_medium": 0.7977, "eval_map_per_class": 0.764, "eval_map_small": 0.6091, "eval_mar_1": 0.0405, "eval_mar_10": 0.3996, "eval_mar_100": 0.8377, "eval_mar_100_per_class": 0.8377, "eval_mar_large": 0.9938, "eval_mar_medium": 0.8677, "eval_mar_small": 0.692, "eval_runtime": 302.0813, "eval_samples_per_second": 0.437, "eval_steps_per_second": 0.056, "step": 975 }, { "epoch": 14.0, "grad_norm": 25.183696746826172, "learning_rate": 2.0183076087440044e-06, "loss": 6.8559, "step": 1050 }, { "epoch": 14.0, "eval_classes": 0, "eval_loss": 7.071371555328369, "eval_map": 0.7643, "eval_map_50": 0.9693, "eval_map_75": 0.8986, "eval_map_large": 0.9899, "eval_map_medium": 0.7991, "eval_map_per_class": 0.7643, "eval_map_small": 0.6084, "eval_mar_1": 0.0413, "eval_mar_10": 0.3994, "eval_mar_100": 0.8387, "eval_mar_100_per_class": 0.8387, "eval_mar_large": 0.9938, "eval_mar_medium": 0.8693, "eval_mar_small": 0.6913, "eval_runtime": 302.1345, "eval_samples_per_second": 0.437, "eval_steps_per_second": 0.056, "step": 1050 }, { "epoch": 15.0, "grad_norm": 26.083616256713867, "learning_rate": 1.4197248388455693e-06, "loss": 6.7735, "step": 1125 }, { "epoch": 15.0, "eval_classes": 0, "eval_loss": 6.997360706329346, "eval_map": 0.7671, "eval_map_50": 0.9702, "eval_map_75": 0.9007, "eval_map_large": 0.9908, "eval_map_medium": 0.8014, "eval_map_per_class": 0.7671, "eval_map_small": 0.6119, "eval_mar_1": 0.0413, "eval_mar_10": 0.3998, "eval_mar_100": 0.8395, "eval_mar_100_per_class": 0.8395, "eval_mar_large": 0.9938, "eval_mar_medium": 0.8706, "eval_mar_small": 0.6902, "eval_runtime": 302.6848, "eval_samples_per_second": 0.436, "eval_steps_per_second": 0.056, "step": 1125 }, { "epoch": 16.0, "grad_norm": 25.00279998779297, "learning_rate": 9.116940118607792e-07, "loss": 6.7596, "step": 1200 }, { "epoch": 16.0, "eval_classes": 0, "eval_loss": 6.9286627769470215, "eval_map": 0.7675, "eval_map_50": 0.9702, "eval_map_75": 0.9041, "eval_map_large": 0.991, "eval_map_medium": 0.8007, "eval_map_per_class": 0.7675, "eval_map_small": 0.614, "eval_mar_1": 0.0406, "eval_mar_10": 0.399, "eval_mar_100": 0.8393, "eval_mar_100_per_class": 0.8393, "eval_mar_large": 0.9938, "eval_mar_medium": 0.8696, "eval_mar_small": 0.6931, "eval_runtime": 302.6539, "eval_samples_per_second": 0.436, "eval_steps_per_second": 0.056, "step": 1200 }, { "epoch": 17.0, "grad_norm": 29.690765380859375, "learning_rate": 5.070641900468149e-07, "loss": 6.7191, "step": 1275 }, { "epoch": 17.0, "eval_classes": 0, "eval_loss": 7.00290584564209, "eval_map": 0.7678, "eval_map_50": 0.9701, "eval_map_75": 0.9039, "eval_map_large": 0.991, "eval_map_medium": 0.8016, "eval_map_per_class": 0.7678, "eval_map_small": 0.61, "eval_mar_1": 0.0407, "eval_mar_10": 0.3991, "eval_mar_100": 0.8407, "eval_mar_100_per_class": 0.8407, "eval_mar_large": 0.9938, "eval_mar_medium": 0.8711, "eval_mar_small": 0.6943, "eval_runtime": 302.683, "eval_samples_per_second": 0.436, "eval_steps_per_second": 0.056, "step": 1275 }, { "epoch": 18.0, "grad_norm": 10.844990730285645, "learning_rate": 2.1606922831058198e-07, "loss": 6.6811, "step": 1350 }, { "epoch": 18.0, "eval_classes": 0, "eval_loss": 6.940825462341309, "eval_map": 0.7678, "eval_map_50": 0.97, "eval_map_75": 0.901, "eval_map_large": 0.991, "eval_map_medium": 0.8014, "eval_map_per_class": 0.7678, "eval_map_small": 0.6121, "eval_mar_1": 0.041, "eval_mar_10": 0.4008, "eval_mar_100": 0.84, "eval_mar_100_per_class": 0.84, "eval_mar_large": 0.9938, "eval_mar_medium": 0.8705, "eval_mar_small": 0.6928, "eval_runtime": 302.7462, "eval_samples_per_second": 0.436, "eval_steps_per_second": 0.056, "step": 1350 }, { "epoch": 19.0, "grad_norm": 35.2485237121582, "learning_rate": 4.60689406264897e-08, "loss": 6.6732, "step": 1425 }, { "epoch": 19.0, "eval_classes": 0, "eval_loss": 6.960495471954346, "eval_map": 0.7679, "eval_map_50": 0.9696, "eval_map_75": 0.9012, "eval_map_large": 0.9911, "eval_map_medium": 0.801, "eval_map_per_class": 0.7679, "eval_map_small": 0.6142, "eval_mar_1": 0.0413, "eval_mar_10": 0.4005, "eval_mar_100": 0.8405, "eval_mar_100_per_class": 0.8405, "eval_mar_large": 0.9938, "eval_mar_medium": 0.871, "eval_mar_small": 0.6935, "eval_runtime": 302.8692, "eval_samples_per_second": 0.436, "eval_steps_per_second": 0.056, "step": 1425 }, { "epoch": 19.74074074074074, "grad_norm": 524.6016845703125, "learning_rate": 9.124060015425206e-10, "loss": 6.7621, "step": 1480 }, { "epoch": 19.74074074074074, "eval_classes": 0, "eval_loss": 6.952253341674805, "eval_map": 0.7683, "eval_map_50": 0.9698, "eval_map_75": 0.9012, "eval_map_large": 0.9911, "eval_map_medium": 0.8012, "eval_map_per_class": 0.7683, "eval_map_small": 0.6166, "eval_mar_1": 0.0414, "eval_mar_10": 0.3998, "eval_mar_100": 0.8409, "eval_mar_100_per_class": 0.8409, "eval_mar_large": 0.9938, "eval_mar_medium": 0.8711, "eval_mar_small": 0.6954, "eval_runtime": 304.7146, "eval_samples_per_second": 0.433, "eval_steps_per_second": 0.056, "step": 1480 }, { "epoch": 19.74074074074074, "step": 1480, "total_flos": 5.235456347568341e+18, "train_loss": 8.241098208040805, "train_runtime": 50547.6535, "train_samples_per_second": 0.47, "train_steps_per_second": 0.029 } ], "logging_steps": 500, "max_steps": 1480, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.235456347568341e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }