|
{ |
|
"best_metric": 28.4991512298584, |
|
"best_model_checkpoint": "mask2former/checkpoint-2000", |
|
"epoch": 6.197183098591549, |
|
"eval_steps": 50, |
|
"global_step": 2200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14084507042253522, |
|
"grad_norm": 227.8921661376953, |
|
"learning_rate": 4.9935203966644134e-05, |
|
"loss": 50.7018, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14084507042253522, |
|
"eval_loss": 44.24353790283203, |
|
"eval_runtime": 135.6148, |
|
"eval_samples_per_second": 9.306, |
|
"eval_steps_per_second": 1.165, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.28169014084507044, |
|
"grad_norm": 232.9764404296875, |
|
"learning_rate": 4.986477349560514e-05, |
|
"loss": 40.5877, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28169014084507044, |
|
"eval_loss": 39.646488189697266, |
|
"eval_runtime": 119.1324, |
|
"eval_samples_per_second": 10.593, |
|
"eval_steps_per_second": 1.326, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4225352112676056, |
|
"grad_norm": 375.6308898925781, |
|
"learning_rate": 4.979434302456615e-05, |
|
"loss": 37.4102, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4225352112676056, |
|
"eval_loss": 37.247108459472656, |
|
"eval_runtime": 119.6132, |
|
"eval_samples_per_second": 10.551, |
|
"eval_steps_per_second": 1.321, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5633802816901409, |
|
"grad_norm": 189.58392333984375, |
|
"learning_rate": 4.972391255352716e-05, |
|
"loss": 35.7502, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5633802816901409, |
|
"eval_loss": 36.34551239013672, |
|
"eval_runtime": 132.8881, |
|
"eval_samples_per_second": 9.497, |
|
"eval_steps_per_second": 1.189, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.704225352112676, |
|
"grad_norm": 163.74330139160156, |
|
"learning_rate": 4.965348208248817e-05, |
|
"loss": 34.7067, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.704225352112676, |
|
"eval_loss": 34.88238525390625, |
|
"eval_runtime": 120.2665, |
|
"eval_samples_per_second": 10.493, |
|
"eval_steps_per_second": 1.314, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.8450704225352113, |
|
"grad_norm": 173.85494995117188, |
|
"learning_rate": 4.958305161144918e-05, |
|
"loss": 34.0798, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8450704225352113, |
|
"eval_loss": 34.85204315185547, |
|
"eval_runtime": 129.3353, |
|
"eval_samples_per_second": 9.758, |
|
"eval_steps_per_second": 1.222, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9859154929577465, |
|
"grad_norm": 167.4436492919922, |
|
"learning_rate": 4.951262114041019e-05, |
|
"loss": 33.3503, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9859154929577465, |
|
"eval_loss": 33.73210144042969, |
|
"eval_runtime": 143.3502, |
|
"eval_samples_per_second": 8.804, |
|
"eval_steps_per_second": 1.102, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.1267605633802817, |
|
"grad_norm": 202.66012573242188, |
|
"learning_rate": 4.944219066937119e-05, |
|
"loss": 32.3436, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.1267605633802817, |
|
"eval_loss": 33.15604782104492, |
|
"eval_runtime": 123.8826, |
|
"eval_samples_per_second": 10.187, |
|
"eval_steps_per_second": 1.275, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.267605633802817, |
|
"grad_norm": 145.71310424804688, |
|
"learning_rate": 4.937176019833221e-05, |
|
"loss": 32.3845, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.267605633802817, |
|
"eval_loss": 33.041107177734375, |
|
"eval_runtime": 125.7421, |
|
"eval_samples_per_second": 10.036, |
|
"eval_steps_per_second": 1.257, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.408450704225352, |
|
"grad_norm": 143.16452026367188, |
|
"learning_rate": 4.930132972729321e-05, |
|
"loss": 30.8809, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.408450704225352, |
|
"eval_loss": 32.785240173339844, |
|
"eval_runtime": 137.1907, |
|
"eval_samples_per_second": 9.199, |
|
"eval_steps_per_second": 1.152, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.5492957746478875, |
|
"grad_norm": 159.82777404785156, |
|
"learning_rate": 4.9230899256254227e-05, |
|
"loss": 31.689, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.5492957746478875, |
|
"eval_loss": 31.99137306213379, |
|
"eval_runtime": 129.225, |
|
"eval_samples_per_second": 9.766, |
|
"eval_steps_per_second": 1.223, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.6901408450704225, |
|
"grad_norm": 160.78164672851562, |
|
"learning_rate": 4.916046878521523e-05, |
|
"loss": 31.036, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.6901408450704225, |
|
"eval_loss": 32.72974395751953, |
|
"eval_runtime": 129.162, |
|
"eval_samples_per_second": 9.771, |
|
"eval_steps_per_second": 1.223, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.8309859154929577, |
|
"grad_norm": 206.84974670410156, |
|
"learning_rate": 4.9090038314176246e-05, |
|
"loss": 30.9795, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.8309859154929577, |
|
"eval_loss": 31.88483238220215, |
|
"eval_runtime": 126.8688, |
|
"eval_samples_per_second": 9.947, |
|
"eval_steps_per_second": 1.245, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.971830985915493, |
|
"grad_norm": 128.7499237060547, |
|
"learning_rate": 4.901960784313725e-05, |
|
"loss": 30.7918, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.971830985915493, |
|
"eval_loss": 31.528514862060547, |
|
"eval_runtime": 131.1893, |
|
"eval_samples_per_second": 9.62, |
|
"eval_steps_per_second": 1.204, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.112676056338028, |
|
"grad_norm": 155.95223999023438, |
|
"learning_rate": 4.8949177372098266e-05, |
|
"loss": 30.1432, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.112676056338028, |
|
"eval_loss": 32.06336212158203, |
|
"eval_runtime": 130.1706, |
|
"eval_samples_per_second": 9.695, |
|
"eval_steps_per_second": 1.214, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.2535211267605635, |
|
"grad_norm": 122.61072540283203, |
|
"learning_rate": 4.887874690105927e-05, |
|
"loss": 29.7082, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.2535211267605635, |
|
"eval_loss": 31.184894561767578, |
|
"eval_runtime": 121.0117, |
|
"eval_samples_per_second": 10.429, |
|
"eval_steps_per_second": 1.306, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.3943661971830985, |
|
"grad_norm": 139.07225036621094, |
|
"learning_rate": 4.8808316430020286e-05, |
|
"loss": 28.7869, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.3943661971830985, |
|
"eval_loss": 30.902196884155273, |
|
"eval_runtime": 127.2634, |
|
"eval_samples_per_second": 9.916, |
|
"eval_steps_per_second": 1.242, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.535211267605634, |
|
"grad_norm": 188.21234130859375, |
|
"learning_rate": 4.873788595898129e-05, |
|
"loss": 29.4227, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.535211267605634, |
|
"eval_loss": 30.5902099609375, |
|
"eval_runtime": 146.3048, |
|
"eval_samples_per_second": 8.626, |
|
"eval_steps_per_second": 1.08, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.676056338028169, |
|
"grad_norm": 302.049560546875, |
|
"learning_rate": 4.8667455487942306e-05, |
|
"loss": 29.1865, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.676056338028169, |
|
"eval_loss": 30.381799697875977, |
|
"eval_runtime": 118.3226, |
|
"eval_samples_per_second": 10.666, |
|
"eval_steps_per_second": 1.335, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.816901408450704, |
|
"grad_norm": 151.5100860595703, |
|
"learning_rate": 4.859702501690331e-05, |
|
"loss": 29.2715, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.816901408450704, |
|
"eval_loss": 30.919567108154297, |
|
"eval_runtime": 128.7879, |
|
"eval_samples_per_second": 9.799, |
|
"eval_steps_per_second": 1.227, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.9577464788732395, |
|
"grad_norm": 135.14910888671875, |
|
"learning_rate": 4.8526594545864326e-05, |
|
"loss": 29.1941, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.9577464788732395, |
|
"eval_loss": 30.816268920898438, |
|
"eval_runtime": 127.4007, |
|
"eval_samples_per_second": 9.906, |
|
"eval_steps_per_second": 1.24, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 3.0985915492957745, |
|
"grad_norm": 121.41586303710938, |
|
"learning_rate": 4.845616407482533e-05, |
|
"loss": 28.5256, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.0985915492957745, |
|
"eval_loss": 30.472957611083984, |
|
"eval_runtime": 144.0632, |
|
"eval_samples_per_second": 8.76, |
|
"eval_steps_per_second": 1.097, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.23943661971831, |
|
"grad_norm": 217.75247192382812, |
|
"learning_rate": 4.8385733603786346e-05, |
|
"loss": 28.0419, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.23943661971831, |
|
"eval_loss": 30.653095245361328, |
|
"eval_runtime": 129.0443, |
|
"eval_samples_per_second": 9.78, |
|
"eval_steps_per_second": 1.224, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.380281690140845, |
|
"grad_norm": 129.21693420410156, |
|
"learning_rate": 4.831530313274735e-05, |
|
"loss": 28.0538, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.380281690140845, |
|
"eval_loss": 30.077850341796875, |
|
"eval_runtime": 119.4037, |
|
"eval_samples_per_second": 10.569, |
|
"eval_steps_per_second": 1.323, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.52112676056338, |
|
"grad_norm": 116.27620697021484, |
|
"learning_rate": 4.8244872661708365e-05, |
|
"loss": 27.9463, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.52112676056338, |
|
"eval_loss": 30.61139488220215, |
|
"eval_runtime": 132.5429, |
|
"eval_samples_per_second": 9.521, |
|
"eval_steps_per_second": 1.192, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.6619718309859155, |
|
"grad_norm": 149.399169921875, |
|
"learning_rate": 4.817444219066937e-05, |
|
"loss": 27.4152, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.6619718309859155, |
|
"eval_loss": 30.551870346069336, |
|
"eval_runtime": 130.4003, |
|
"eval_samples_per_second": 9.678, |
|
"eval_steps_per_second": 1.212, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.802816901408451, |
|
"grad_norm": 175.60769653320312, |
|
"learning_rate": 4.8104011719630385e-05, |
|
"loss": 27.7461, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.802816901408451, |
|
"eval_loss": 29.564067840576172, |
|
"eval_runtime": 131.8055, |
|
"eval_samples_per_second": 9.575, |
|
"eval_steps_per_second": 1.199, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.943661971830986, |
|
"grad_norm": 114.43487548828125, |
|
"learning_rate": 4.803358124859139e-05, |
|
"loss": 27.5604, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.943661971830986, |
|
"eval_loss": 30.12961769104004, |
|
"eval_runtime": 148.0714, |
|
"eval_samples_per_second": 8.523, |
|
"eval_steps_per_second": 1.067, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.084507042253521, |
|
"grad_norm": 189.4749298095703, |
|
"learning_rate": 4.79631507775524e-05, |
|
"loss": 27.381, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 4.084507042253521, |
|
"eval_loss": 30.50173568725586, |
|
"eval_runtime": 124.4448, |
|
"eval_samples_per_second": 10.141, |
|
"eval_steps_per_second": 1.27, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 4.225352112676056, |
|
"grad_norm": 129.88868713378906, |
|
"learning_rate": 4.789272030651341e-05, |
|
"loss": 26.3816, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.225352112676056, |
|
"eval_loss": 29.6898193359375, |
|
"eval_runtime": 129.216, |
|
"eval_samples_per_second": 9.767, |
|
"eval_steps_per_second": 1.223, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.366197183098592, |
|
"grad_norm": 138.11952209472656, |
|
"learning_rate": 4.782228983547442e-05, |
|
"loss": 26.5218, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 4.366197183098592, |
|
"eval_loss": 29.94746971130371, |
|
"eval_runtime": 132.27, |
|
"eval_samples_per_second": 9.541, |
|
"eval_steps_per_second": 1.195, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 4.507042253521127, |
|
"grad_norm": 185.40530395507812, |
|
"learning_rate": 4.775185936443543e-05, |
|
"loss": 26.9798, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.507042253521127, |
|
"eval_loss": 29.332275390625, |
|
"eval_runtime": 137.9492, |
|
"eval_samples_per_second": 9.148, |
|
"eval_steps_per_second": 1.145, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.647887323943662, |
|
"grad_norm": 135.86349487304688, |
|
"learning_rate": 4.768142889339644e-05, |
|
"loss": 26.8186, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 4.647887323943662, |
|
"eval_loss": 29.575531005859375, |
|
"eval_runtime": 135.1713, |
|
"eval_samples_per_second": 9.336, |
|
"eval_steps_per_second": 1.169, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 4.788732394366197, |
|
"grad_norm": 153.70196533203125, |
|
"learning_rate": 4.761099842235745e-05, |
|
"loss": 27.5111, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.788732394366197, |
|
"eval_loss": 30.7945499420166, |
|
"eval_runtime": 117.37, |
|
"eval_samples_per_second": 10.752, |
|
"eval_steps_per_second": 1.346, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.929577464788732, |
|
"grad_norm": 150.87384033203125, |
|
"learning_rate": 4.754056795131846e-05, |
|
"loss": 27.0839, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 4.929577464788732, |
|
"eval_loss": 29.414661407470703, |
|
"eval_runtime": 120.7787, |
|
"eval_samples_per_second": 10.449, |
|
"eval_steps_per_second": 1.308, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 5.070422535211268, |
|
"grad_norm": 140.32867431640625, |
|
"learning_rate": 4.747013748027947e-05, |
|
"loss": 26.6393, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.070422535211268, |
|
"eval_loss": 28.79827880859375, |
|
"eval_runtime": 126.6565, |
|
"eval_samples_per_second": 9.964, |
|
"eval_steps_per_second": 1.247, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.211267605633803, |
|
"grad_norm": 105.2396469116211, |
|
"learning_rate": 4.739970700924048e-05, |
|
"loss": 26.3564, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 5.211267605633803, |
|
"eval_loss": 29.22454261779785, |
|
"eval_runtime": 138.64, |
|
"eval_samples_per_second": 9.103, |
|
"eval_steps_per_second": 1.14, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 5.352112676056338, |
|
"grad_norm": 158.32907104492188, |
|
"learning_rate": 4.732927653820149e-05, |
|
"loss": 25.6174, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.352112676056338, |
|
"eval_loss": 28.933706283569336, |
|
"eval_runtime": 124.4051, |
|
"eval_samples_per_second": 10.144, |
|
"eval_steps_per_second": 1.27, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.492957746478873, |
|
"grad_norm": 209.70982360839844, |
|
"learning_rate": 4.72588460671625e-05, |
|
"loss": 25.8777, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 5.492957746478873, |
|
"eval_loss": 29.477840423583984, |
|
"eval_runtime": 138.0785, |
|
"eval_samples_per_second": 9.14, |
|
"eval_steps_per_second": 1.144, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 5.633802816901408, |
|
"grad_norm": 172.13571166992188, |
|
"learning_rate": 4.718841559612351e-05, |
|
"loss": 25.6848, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.633802816901408, |
|
"eval_loss": 28.4991512298584, |
|
"eval_runtime": 154.121, |
|
"eval_samples_per_second": 8.188, |
|
"eval_steps_per_second": 1.025, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.774647887323944, |
|
"grad_norm": 133.943115234375, |
|
"learning_rate": 4.711798512508452e-05, |
|
"loss": 26.4625, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 5.774647887323944, |
|
"eval_loss": 29.618194580078125, |
|
"eval_runtime": 118.4654, |
|
"eval_samples_per_second": 10.653, |
|
"eval_steps_per_second": 1.334, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 5.915492957746479, |
|
"grad_norm": 143.0037078857422, |
|
"learning_rate": 4.704755465404553e-05, |
|
"loss": 26.8448, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.915492957746479, |
|
"eval_loss": 29.537738800048828, |
|
"eval_runtime": 144.9122, |
|
"eval_samples_per_second": 8.709, |
|
"eval_steps_per_second": 1.09, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 6.056338028169014, |
|
"grad_norm": 127.10630798339844, |
|
"learning_rate": 4.697712418300654e-05, |
|
"loss": 26.0681, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 6.056338028169014, |
|
"eval_loss": 29.239002227783203, |
|
"eval_runtime": 131.0446, |
|
"eval_samples_per_second": 9.63, |
|
"eval_steps_per_second": 1.206, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 6.197183098591549, |
|
"grad_norm": 142.51170349121094, |
|
"learning_rate": 4.690669371196755e-05, |
|
"loss": 25.628, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.197183098591549, |
|
"eval_loss": 29.1112003326416, |
|
"eval_runtime": 119.9659, |
|
"eval_samples_per_second": 10.52, |
|
"eval_steps_per_second": 1.317, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.197183098591549, |
|
"step": 2200, |
|
"total_flos": 8.850907754333798e+18, |
|
"train_loss": 29.80798134543679, |
|
"train_runtime": 17574.1061, |
|
"train_samples_per_second": 64.612, |
|
"train_steps_per_second": 2.02 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 35500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 4, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 4 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.850907754333798e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|