File size: 3,394 Bytes
c7f0cc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
model = dict(
    type='DETR4seg',
    backbone=dict(type='ResNet',
                  depth=101,
                  num_stages=4,
                  out_indices=(0, 1, 2, 3),
                  frozen_stages=1,
                  norm_cfg=dict(type='BN', requires_grad=False),
                  norm_eval=True,
                  style='pytorch',
                  init_cfg=dict(type='Pretrained',
                                checkpoint='torchvision://resnet101')),
    bbox_head=dict(type='detr4segHead',
                   num_classes=80,
                   in_channels=2048,
                   transformer=dict(
                       type='Transformer',
                       encoder=dict(type='DetrTransformerEncoder',
                                    num_layers=6,
                                    transformerlayers=dict(
                                        type='BaseTransformerLayer',
                                        attn_cfgs=[
                                            dict(type='MultiheadAttention',
                                                 embed_dims=256,
                                                 num_heads=8,
                                                 dropout=0.1)
                                        ],
                                        feedforward_channels=2048,
                                        ffn_dropout=0.1,
                                        operation_order=('self_attn', 'norm',
                                                         'ffn', 'norm'))),
                       decoder=dict(
                           type='DetrTransformerDecoder',
                           return_intermediate=True,
                           num_layers=6,
                           transformerlayers=dict(
                               type='DetrTransformerDecoderLayer',
                               attn_cfgs=dict(type='MultiheadAttention',
                                              embed_dims=256,
                                              num_heads=8,
                                              dropout=0.1),
                               feedforward_channels=2048,
                               ffn_dropout=0.1,
                               operation_order=('self_attn', 'norm',
                                                'cross_attn', 'norm', 'ffn',
                                                'norm')),
                       )),
                   positional_encoding=dict(type='SinePositionalEncoding',
                                            num_feats=128,
                                            normalize=True),
                   loss_cls=dict(type='CrossEntropyLoss',
                                 use_sigmoid=False,
                                 loss_weight=1.0,
                                 class_weight=1.0),
                   loss_bbox=dict(type='L1Loss', loss_weight=5.0),
                   loss_iou=dict(type='GIoULoss', loss_weight=2.0),
                   dice_loss=dict(type='DiceLoss', loss_weight=1.0)),
    # training and testing settings
    train_cfg=dict(assigner=dict(
        type='HungarianAssigner',
        cls_cost=dict(type='ClassificationCost', weight=1.),
        reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),
        iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))),
    test_cfg=dict(max_per_img=100))