mask2former / trainer_state.json
praneethd7's picture
UTEL-UIUC/ft-aug-mask2former-swin-small-ade-semantic
101722a verified
{
"best_metric": 28.4991512298584,
"best_model_checkpoint": "mask2former/checkpoint-2000",
"epoch": 6.197183098591549,
"eval_steps": 50,
"global_step": 2200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14084507042253522,
"grad_norm": 227.8921661376953,
"learning_rate": 4.9935203966644134e-05,
"loss": 50.7018,
"step": 50
},
{
"epoch": 0.14084507042253522,
"eval_loss": 44.24353790283203,
"eval_runtime": 135.6148,
"eval_samples_per_second": 9.306,
"eval_steps_per_second": 1.165,
"step": 50
},
{
"epoch": 0.28169014084507044,
"grad_norm": 232.9764404296875,
"learning_rate": 4.986477349560514e-05,
"loss": 40.5877,
"step": 100
},
{
"epoch": 0.28169014084507044,
"eval_loss": 39.646488189697266,
"eval_runtime": 119.1324,
"eval_samples_per_second": 10.593,
"eval_steps_per_second": 1.326,
"step": 100
},
{
"epoch": 0.4225352112676056,
"grad_norm": 375.6308898925781,
"learning_rate": 4.979434302456615e-05,
"loss": 37.4102,
"step": 150
},
{
"epoch": 0.4225352112676056,
"eval_loss": 37.247108459472656,
"eval_runtime": 119.6132,
"eval_samples_per_second": 10.551,
"eval_steps_per_second": 1.321,
"step": 150
},
{
"epoch": 0.5633802816901409,
"grad_norm": 189.58392333984375,
"learning_rate": 4.972391255352716e-05,
"loss": 35.7502,
"step": 200
},
{
"epoch": 0.5633802816901409,
"eval_loss": 36.34551239013672,
"eval_runtime": 132.8881,
"eval_samples_per_second": 9.497,
"eval_steps_per_second": 1.189,
"step": 200
},
{
"epoch": 0.704225352112676,
"grad_norm": 163.74330139160156,
"learning_rate": 4.965348208248817e-05,
"loss": 34.7067,
"step": 250
},
{
"epoch": 0.704225352112676,
"eval_loss": 34.88238525390625,
"eval_runtime": 120.2665,
"eval_samples_per_second": 10.493,
"eval_steps_per_second": 1.314,
"step": 250
},
{
"epoch": 0.8450704225352113,
"grad_norm": 173.85494995117188,
"learning_rate": 4.958305161144918e-05,
"loss": 34.0798,
"step": 300
},
{
"epoch": 0.8450704225352113,
"eval_loss": 34.85204315185547,
"eval_runtime": 129.3353,
"eval_samples_per_second": 9.758,
"eval_steps_per_second": 1.222,
"step": 300
},
{
"epoch": 0.9859154929577465,
"grad_norm": 167.4436492919922,
"learning_rate": 4.951262114041019e-05,
"loss": 33.3503,
"step": 350
},
{
"epoch": 0.9859154929577465,
"eval_loss": 33.73210144042969,
"eval_runtime": 143.3502,
"eval_samples_per_second": 8.804,
"eval_steps_per_second": 1.102,
"step": 350
},
{
"epoch": 1.1267605633802817,
"grad_norm": 202.66012573242188,
"learning_rate": 4.944219066937119e-05,
"loss": 32.3436,
"step": 400
},
{
"epoch": 1.1267605633802817,
"eval_loss": 33.15604782104492,
"eval_runtime": 123.8826,
"eval_samples_per_second": 10.187,
"eval_steps_per_second": 1.275,
"step": 400
},
{
"epoch": 1.267605633802817,
"grad_norm": 145.71310424804688,
"learning_rate": 4.937176019833221e-05,
"loss": 32.3845,
"step": 450
},
{
"epoch": 1.267605633802817,
"eval_loss": 33.041107177734375,
"eval_runtime": 125.7421,
"eval_samples_per_second": 10.036,
"eval_steps_per_second": 1.257,
"step": 450
},
{
"epoch": 1.408450704225352,
"grad_norm": 143.16452026367188,
"learning_rate": 4.930132972729321e-05,
"loss": 30.8809,
"step": 500
},
{
"epoch": 1.408450704225352,
"eval_loss": 32.785240173339844,
"eval_runtime": 137.1907,
"eval_samples_per_second": 9.199,
"eval_steps_per_second": 1.152,
"step": 500
},
{
"epoch": 1.5492957746478875,
"grad_norm": 159.82777404785156,
"learning_rate": 4.9230899256254227e-05,
"loss": 31.689,
"step": 550
},
{
"epoch": 1.5492957746478875,
"eval_loss": 31.99137306213379,
"eval_runtime": 129.225,
"eval_samples_per_second": 9.766,
"eval_steps_per_second": 1.223,
"step": 550
},
{
"epoch": 1.6901408450704225,
"grad_norm": 160.78164672851562,
"learning_rate": 4.916046878521523e-05,
"loss": 31.036,
"step": 600
},
{
"epoch": 1.6901408450704225,
"eval_loss": 32.72974395751953,
"eval_runtime": 129.162,
"eval_samples_per_second": 9.771,
"eval_steps_per_second": 1.223,
"step": 600
},
{
"epoch": 1.8309859154929577,
"grad_norm": 206.84974670410156,
"learning_rate": 4.9090038314176246e-05,
"loss": 30.9795,
"step": 650
},
{
"epoch": 1.8309859154929577,
"eval_loss": 31.88483238220215,
"eval_runtime": 126.8688,
"eval_samples_per_second": 9.947,
"eval_steps_per_second": 1.245,
"step": 650
},
{
"epoch": 1.971830985915493,
"grad_norm": 128.7499237060547,
"learning_rate": 4.901960784313725e-05,
"loss": 30.7918,
"step": 700
},
{
"epoch": 1.971830985915493,
"eval_loss": 31.528514862060547,
"eval_runtime": 131.1893,
"eval_samples_per_second": 9.62,
"eval_steps_per_second": 1.204,
"step": 700
},
{
"epoch": 2.112676056338028,
"grad_norm": 155.95223999023438,
"learning_rate": 4.8949177372098266e-05,
"loss": 30.1432,
"step": 750
},
{
"epoch": 2.112676056338028,
"eval_loss": 32.06336212158203,
"eval_runtime": 130.1706,
"eval_samples_per_second": 9.695,
"eval_steps_per_second": 1.214,
"step": 750
},
{
"epoch": 2.2535211267605635,
"grad_norm": 122.61072540283203,
"learning_rate": 4.887874690105927e-05,
"loss": 29.7082,
"step": 800
},
{
"epoch": 2.2535211267605635,
"eval_loss": 31.184894561767578,
"eval_runtime": 121.0117,
"eval_samples_per_second": 10.429,
"eval_steps_per_second": 1.306,
"step": 800
},
{
"epoch": 2.3943661971830985,
"grad_norm": 139.07225036621094,
"learning_rate": 4.8808316430020286e-05,
"loss": 28.7869,
"step": 850
},
{
"epoch": 2.3943661971830985,
"eval_loss": 30.902196884155273,
"eval_runtime": 127.2634,
"eval_samples_per_second": 9.916,
"eval_steps_per_second": 1.242,
"step": 850
},
{
"epoch": 2.535211267605634,
"grad_norm": 188.21234130859375,
"learning_rate": 4.873788595898129e-05,
"loss": 29.4227,
"step": 900
},
{
"epoch": 2.535211267605634,
"eval_loss": 30.5902099609375,
"eval_runtime": 146.3048,
"eval_samples_per_second": 8.626,
"eval_steps_per_second": 1.08,
"step": 900
},
{
"epoch": 2.676056338028169,
"grad_norm": 302.049560546875,
"learning_rate": 4.8667455487942306e-05,
"loss": 29.1865,
"step": 950
},
{
"epoch": 2.676056338028169,
"eval_loss": 30.381799697875977,
"eval_runtime": 118.3226,
"eval_samples_per_second": 10.666,
"eval_steps_per_second": 1.335,
"step": 950
},
{
"epoch": 2.816901408450704,
"grad_norm": 151.5100860595703,
"learning_rate": 4.859702501690331e-05,
"loss": 29.2715,
"step": 1000
},
{
"epoch": 2.816901408450704,
"eval_loss": 30.919567108154297,
"eval_runtime": 128.7879,
"eval_samples_per_second": 9.799,
"eval_steps_per_second": 1.227,
"step": 1000
},
{
"epoch": 2.9577464788732395,
"grad_norm": 135.14910888671875,
"learning_rate": 4.8526594545864326e-05,
"loss": 29.1941,
"step": 1050
},
{
"epoch": 2.9577464788732395,
"eval_loss": 30.816268920898438,
"eval_runtime": 127.4007,
"eval_samples_per_second": 9.906,
"eval_steps_per_second": 1.24,
"step": 1050
},
{
"epoch": 3.0985915492957745,
"grad_norm": 121.41586303710938,
"learning_rate": 4.845616407482533e-05,
"loss": 28.5256,
"step": 1100
},
{
"epoch": 3.0985915492957745,
"eval_loss": 30.472957611083984,
"eval_runtime": 144.0632,
"eval_samples_per_second": 8.76,
"eval_steps_per_second": 1.097,
"step": 1100
},
{
"epoch": 3.23943661971831,
"grad_norm": 217.75247192382812,
"learning_rate": 4.8385733603786346e-05,
"loss": 28.0419,
"step": 1150
},
{
"epoch": 3.23943661971831,
"eval_loss": 30.653095245361328,
"eval_runtime": 129.0443,
"eval_samples_per_second": 9.78,
"eval_steps_per_second": 1.224,
"step": 1150
},
{
"epoch": 3.380281690140845,
"grad_norm": 129.21693420410156,
"learning_rate": 4.831530313274735e-05,
"loss": 28.0538,
"step": 1200
},
{
"epoch": 3.380281690140845,
"eval_loss": 30.077850341796875,
"eval_runtime": 119.4037,
"eval_samples_per_second": 10.569,
"eval_steps_per_second": 1.323,
"step": 1200
},
{
"epoch": 3.52112676056338,
"grad_norm": 116.27620697021484,
"learning_rate": 4.8244872661708365e-05,
"loss": 27.9463,
"step": 1250
},
{
"epoch": 3.52112676056338,
"eval_loss": 30.61139488220215,
"eval_runtime": 132.5429,
"eval_samples_per_second": 9.521,
"eval_steps_per_second": 1.192,
"step": 1250
},
{
"epoch": 3.6619718309859155,
"grad_norm": 149.399169921875,
"learning_rate": 4.817444219066937e-05,
"loss": 27.4152,
"step": 1300
},
{
"epoch": 3.6619718309859155,
"eval_loss": 30.551870346069336,
"eval_runtime": 130.4003,
"eval_samples_per_second": 9.678,
"eval_steps_per_second": 1.212,
"step": 1300
},
{
"epoch": 3.802816901408451,
"grad_norm": 175.60769653320312,
"learning_rate": 4.8104011719630385e-05,
"loss": 27.7461,
"step": 1350
},
{
"epoch": 3.802816901408451,
"eval_loss": 29.564067840576172,
"eval_runtime": 131.8055,
"eval_samples_per_second": 9.575,
"eval_steps_per_second": 1.199,
"step": 1350
},
{
"epoch": 3.943661971830986,
"grad_norm": 114.43487548828125,
"learning_rate": 4.803358124859139e-05,
"loss": 27.5604,
"step": 1400
},
{
"epoch": 3.943661971830986,
"eval_loss": 30.12961769104004,
"eval_runtime": 148.0714,
"eval_samples_per_second": 8.523,
"eval_steps_per_second": 1.067,
"step": 1400
},
{
"epoch": 4.084507042253521,
"grad_norm": 189.4749298095703,
"learning_rate": 4.79631507775524e-05,
"loss": 27.381,
"step": 1450
},
{
"epoch": 4.084507042253521,
"eval_loss": 30.50173568725586,
"eval_runtime": 124.4448,
"eval_samples_per_second": 10.141,
"eval_steps_per_second": 1.27,
"step": 1450
},
{
"epoch": 4.225352112676056,
"grad_norm": 129.88868713378906,
"learning_rate": 4.789272030651341e-05,
"loss": 26.3816,
"step": 1500
},
{
"epoch": 4.225352112676056,
"eval_loss": 29.6898193359375,
"eval_runtime": 129.216,
"eval_samples_per_second": 9.767,
"eval_steps_per_second": 1.223,
"step": 1500
},
{
"epoch": 4.366197183098592,
"grad_norm": 138.11952209472656,
"learning_rate": 4.782228983547442e-05,
"loss": 26.5218,
"step": 1550
},
{
"epoch": 4.366197183098592,
"eval_loss": 29.94746971130371,
"eval_runtime": 132.27,
"eval_samples_per_second": 9.541,
"eval_steps_per_second": 1.195,
"step": 1550
},
{
"epoch": 4.507042253521127,
"grad_norm": 185.40530395507812,
"learning_rate": 4.775185936443543e-05,
"loss": 26.9798,
"step": 1600
},
{
"epoch": 4.507042253521127,
"eval_loss": 29.332275390625,
"eval_runtime": 137.9492,
"eval_samples_per_second": 9.148,
"eval_steps_per_second": 1.145,
"step": 1600
},
{
"epoch": 4.647887323943662,
"grad_norm": 135.86349487304688,
"learning_rate": 4.768142889339644e-05,
"loss": 26.8186,
"step": 1650
},
{
"epoch": 4.647887323943662,
"eval_loss": 29.575531005859375,
"eval_runtime": 135.1713,
"eval_samples_per_second": 9.336,
"eval_steps_per_second": 1.169,
"step": 1650
},
{
"epoch": 4.788732394366197,
"grad_norm": 153.70196533203125,
"learning_rate": 4.761099842235745e-05,
"loss": 27.5111,
"step": 1700
},
{
"epoch": 4.788732394366197,
"eval_loss": 30.7945499420166,
"eval_runtime": 117.37,
"eval_samples_per_second": 10.752,
"eval_steps_per_second": 1.346,
"step": 1700
},
{
"epoch": 4.929577464788732,
"grad_norm": 150.87384033203125,
"learning_rate": 4.754056795131846e-05,
"loss": 27.0839,
"step": 1750
},
{
"epoch": 4.929577464788732,
"eval_loss": 29.414661407470703,
"eval_runtime": 120.7787,
"eval_samples_per_second": 10.449,
"eval_steps_per_second": 1.308,
"step": 1750
},
{
"epoch": 5.070422535211268,
"grad_norm": 140.32867431640625,
"learning_rate": 4.747013748027947e-05,
"loss": 26.6393,
"step": 1800
},
{
"epoch": 5.070422535211268,
"eval_loss": 28.79827880859375,
"eval_runtime": 126.6565,
"eval_samples_per_second": 9.964,
"eval_steps_per_second": 1.247,
"step": 1800
},
{
"epoch": 5.211267605633803,
"grad_norm": 105.2396469116211,
"learning_rate": 4.739970700924048e-05,
"loss": 26.3564,
"step": 1850
},
{
"epoch": 5.211267605633803,
"eval_loss": 29.22454261779785,
"eval_runtime": 138.64,
"eval_samples_per_second": 9.103,
"eval_steps_per_second": 1.14,
"step": 1850
},
{
"epoch": 5.352112676056338,
"grad_norm": 158.32907104492188,
"learning_rate": 4.732927653820149e-05,
"loss": 25.6174,
"step": 1900
},
{
"epoch": 5.352112676056338,
"eval_loss": 28.933706283569336,
"eval_runtime": 124.4051,
"eval_samples_per_second": 10.144,
"eval_steps_per_second": 1.27,
"step": 1900
},
{
"epoch": 5.492957746478873,
"grad_norm": 209.70982360839844,
"learning_rate": 4.72588460671625e-05,
"loss": 25.8777,
"step": 1950
},
{
"epoch": 5.492957746478873,
"eval_loss": 29.477840423583984,
"eval_runtime": 138.0785,
"eval_samples_per_second": 9.14,
"eval_steps_per_second": 1.144,
"step": 1950
},
{
"epoch": 5.633802816901408,
"grad_norm": 172.13571166992188,
"learning_rate": 4.718841559612351e-05,
"loss": 25.6848,
"step": 2000
},
{
"epoch": 5.633802816901408,
"eval_loss": 28.4991512298584,
"eval_runtime": 154.121,
"eval_samples_per_second": 8.188,
"eval_steps_per_second": 1.025,
"step": 2000
},
{
"epoch": 5.774647887323944,
"grad_norm": 133.943115234375,
"learning_rate": 4.711798512508452e-05,
"loss": 26.4625,
"step": 2050
},
{
"epoch": 5.774647887323944,
"eval_loss": 29.618194580078125,
"eval_runtime": 118.4654,
"eval_samples_per_second": 10.653,
"eval_steps_per_second": 1.334,
"step": 2050
},
{
"epoch": 5.915492957746479,
"grad_norm": 143.0037078857422,
"learning_rate": 4.704755465404553e-05,
"loss": 26.8448,
"step": 2100
},
{
"epoch": 5.915492957746479,
"eval_loss": 29.537738800048828,
"eval_runtime": 144.9122,
"eval_samples_per_second": 8.709,
"eval_steps_per_second": 1.09,
"step": 2100
},
{
"epoch": 6.056338028169014,
"grad_norm": 127.10630798339844,
"learning_rate": 4.697712418300654e-05,
"loss": 26.0681,
"step": 2150
},
{
"epoch": 6.056338028169014,
"eval_loss": 29.239002227783203,
"eval_runtime": 131.0446,
"eval_samples_per_second": 9.63,
"eval_steps_per_second": 1.206,
"step": 2150
},
{
"epoch": 6.197183098591549,
"grad_norm": 142.51170349121094,
"learning_rate": 4.690669371196755e-05,
"loss": 25.628,
"step": 2200
},
{
"epoch": 6.197183098591549,
"eval_loss": 29.1112003326416,
"eval_runtime": 119.9659,
"eval_samples_per_second": 10.52,
"eval_steps_per_second": 1.317,
"step": 2200
},
{
"epoch": 6.197183098591549,
"step": 2200,
"total_flos": 8.850907754333798e+18,
"train_loss": 29.80798134543679,
"train_runtime": 17574.1061,
"train_samples_per_second": 64.612,
"train_steps_per_second": 2.02
}
],
"logging_steps": 50,
"max_steps": 35500,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 100,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 4,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 4
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.850907754333798e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}