detr-resnet-50_swny / config.json
rathi2023's picture
Training in progress, step 1000
58011b3 verified
{
"_name_or_path": "facebook/detr-resnet-50",
"activation_dropout": 0.0,
"activation_function": "relu",
"architectures": [
"DetrForObjectDetection"
],
"attention_dropout": 0.0,
"auxiliary_loss": false,
"backbone": "resnet50",
"backbone_config": null,
"backbone_kwargs": {
"in_chans": 3,
"out_indices": [
1,
2,
3,
4
]
},
"bbox_cost": 5,
"bbox_loss_coefficient": 5,
"class_cost": 1,
"classifier_dropout": 0.0,
"d_model": 256,
"decoder_attention_heads": 8,
"decoder_ffn_dim": 2048,
"decoder_layerdrop": 0.0,
"decoder_layers": 6,
"dice_loss_coefficient": 1,
"dilation": false,
"dropout": 0.1,
"encoder_attention_heads": 8,
"encoder_ffn_dim": 2048,
"encoder_layerdrop": 0.0,
"encoder_layers": 6,
"eos_coefficient": 0.1,
"giou_cost": 2,
"giou_loss_coefficient": 2,
"id2label": {
"1": 81,
"2": 80,
"3": 78,
"4": 26,
"5": 27,
"6": 67,
"7": 106,
"8": 107,
"9": 2,
"10": 14,
"11": 63,
"12": 64,
"13": 1,
"14": 63,
"15": 10,
"16": 17,
"17": 15,
"18": 12,
"19": 101,
"20": 100,
"21": 102,
"22": 52,
"23": 15,
"24": 12,
"25": 66,
"26": 63,
"27": 63,
"28": 54,
"29": 55,
"30": 56,
"31": 87,
"32": 26,
"33": 87,
"34": 88,
"35": 90,
"36": 91,
"37": 103,
"38": 104,
"39": 105,
"40": 92,
"41": 51,
"42": 22,
"43": 20,
"44": 19,
"45": 57,
"46": 58,
"47": 59,
"48": 60,
"49": 98,
"50": 99,
"51": 7,
"52": 9,
"53": 10,
"54": 41,
"55": 42,
"56": 32,
"57": 33,
"58": 28,
"59": 30,
"60": 35,
"61": 34,
"62": 8,
"63": 14,
"64": 16,
"65": 51,
"66": 74,
"67": 72,
"68": 73,
"69": 61,
"70": 62,
"71": 53,
"72": 28,
"73": 29,
"74": 39,
"75": 40,
"76": 106,
"77": 107,
"78": 2,
"79": 83,
"80": 70,
"81": 69,
"82": 87,
"83": 89,
"84": 49,
"85": 70,
"86": 71,
"87": 76,
"88": 77,
"89": 72,
"90": 74,
"91": 75,
"92": 101,
"93": 100,
"94": 102,
"95": 6,
"96": 35,
"97": 34,
"98": 83,
"99": 21,
"100": 25,
"101": 23,
"102": 24,
"103": 53,
"104": 31,
"105": 18,
"106": 11,
"107": 13,
"108": 20,
"109": 19,
"110": 84,
"111": 92,
"112": 93,
"113": 20,
"114": 19,
"115": 36,
"116": 37,
"117": 38,
"118": 57,
"119": 58,
"120": 44,
"121": 45,
"122": 46,
"123": 95,
"124": 96,
"125": 23,
"126": 24,
"127": 61,
"128": 62,
"129": 4,
"130": 67,
"131": 74,
"132": 7,
"133": 97,
"134": 98,
"135": 99,
"136": 47,
"137": 48,
"138": 32,
"139": 53,
"140": 94,
"141": 92,
"142": 85,
"143": 86,
"144": 8,
"145": 68,
"146": 3,
"147": 82,
"148": 81,
"149": 80,
"150": 78,
"151": 79,
"152": 97,
"153": 98,
"154": 99,
"155": 50,
"156": 28,
"157": 29,
"158": 42,
"159": 43
},
"init_std": 0.02,
"init_xavier_std": 1.0,
"is_encoder_decoder": true,
"label2id": {
"1": 13,
"2": 78,
"3": 146,
"4": 129,
"6": 95,
"7": 132,
"8": 144,
"9": 52,
"10": 53,
"11": 106,
"12": 24,
"13": 107,
"14": 63,
"15": 23,
"16": 64,
"17": 16,
"18": 105,
"19": 114,
"20": 113,
"21": 99,
"22": 42,
"23": 125,
"24": 126,
"25": 100,
"26": 32,
"27": 5,
"28": 156,
"29": 157,
"30": 59,
"31": 104,
"32": 138,
"33": 57,
"34": 97,
"35": 96,
"36": 115,
"37": 116,
"38": 117,
"39": 74,
"40": 75,
"41": 54,
"42": 158,
"43": 159,
"44": 120,
"45": 121,
"46": 122,
"47": 136,
"48": 137,
"49": 84,
"50": 155,
"51": 65,
"52": 22,
"53": 139,
"54": 28,
"55": 29,
"56": 30,
"57": 118,
"58": 119,
"59": 47,
"60": 48,
"61": 127,
"62": 128,
"63": 27,
"64": 12,
"66": 25,
"67": 130,
"68": 145,
"69": 81,
"70": 85,
"71": 86,
"72": 89,
"73": 68,
"74": 131,
"75": 91,
"76": 87,
"77": 88,
"78": 150,
"79": 151,
"80": 149,
"81": 148,
"82": 147,
"83": 98,
"84": 110,
"85": 142,
"86": 143,
"87": 82,
"88": 34,
"89": 83,
"90": 35,
"91": 36,
"92": 141,
"93": 112,
"94": 140,
"95": 123,
"96": 124,
"97": 152,
"98": 153,
"99": 154,
"100": 93,
"101": 92,
"102": 94,
"103": 37,
"104": 38,
"105": 39,
"106": 76,
"107": 77
},
"mask_loss_coefficient": 1,
"max_position_embeddings": 1024,
"model_type": "detr",
"num_channels": 3,
"num_hidden_layers": 6,
"num_queries": 100,
"position_embedding_type": "sine",
"scale_embedding": false,
"torch_dtype": "float32",
"transformers_version": "4.41.1",
"use_pretrained_backbone": true,
"use_timm_backbone": true
}