File size: 2,576 Bytes

from transformers.configuration_utils import PretrainedConfig

BACKBONE_NAME2WIDTH = {
    "swin_tiny_patch4_window7_224": 768,
    "swin_small_patch4_window7_224": 768,
    "swin_base_patch4_window7_224": 1024,
    "solider_tiny": 768,
    "solider_small": 768,
    "solider_base": 1024,
}


class SOLIDERConfig(PretrainedConfig):
    model_type = "swin_transformer"

    def __init__(
        self,
        pretrain_img_size=224,
        in_channels=3,
        embed_dims=96,
        patch_size=4,
        window_size=7,
        mlp_ratio=4,
        depths=(2, 2, 6, 2),
        num_heads=(3, 6, 12, 24),
        strides=(4, 2, 2, 2),
        out_indices=(0, 1, 2, 3),
        qkv_bias=True,
        qk_scale=None,
        patch_norm=True,
        drop_rate=0.0,
        attn_drop_rate=0.0,
        drop_path_rate=0.0,  # NOTE: I modified this from the implemenation of SOLIDER
        use_abs_pos_embed=False,
        act_cfg=dict(type="GELU"),
        norm_cfg=dict(type="LN"),
        with_cp=False,
        pretrained=None,
        convert_weights=False,
        frozen_stages=-1,
        init_cfg=None,
        semantic_weight=0.5, # NOTE: I modified this from the implemenation of SOLIDER
        name="solider_small",
        **kwargs,
    ):
        self.pretrain_img_size = pretrain_img_size
        self.in_channels = in_channels
        self.embed_dims = embed_dims
        self.patch_size = patch_size
        self.window_size = window_size
        self.mlp_ratio = mlp_ratio
        self.depths = depths
        self.num_heads = num_heads
        self.strides = strides
        self.out_indices = out_indices
        self.qkv_bias = qkv_bias
        self.qk_scale = qk_scale
        self.patch_norm = patch_norm
        self.drop_rate = drop_rate
        self.attn_drop_rate = attn_drop_rate
        self.drop_path_rate = drop_path_rate
        self.use_abs_pos_embed = use_abs_pos_embed
        self.act_cfg = act_cfg
        self.norm_cfg = norm_cfg
        self.with_cp = with_cp
        self.pretrained = pretrained
        self.convert_weights = convert_weights
        self.frozen_stages = frozen_stages
        self.init_cfg = init_cfg
        self.semantic_weight = semantic_weight

        # NOTE: These below attributes are just for provide information!
        # They are not effect on model building!
        self.img_size = pretrain_img_size
        assert name in BACKBONE_NAME2WIDTH
        self.name = name
        self.vision_width = BACKBONE_NAME2WIDTH[self.name]
        self.hidden_size = self.embed_dims

        super().__init__(**kwargs)