solider_small_224 / configuration_solider.py
tuandunghcmut's picture
Upload model
81dda37 verified
from transformers.configuration_utils import PretrainedConfig
BACKBONE_NAME2WIDTH = {
"swin_tiny_patch4_window7_224": 768,
"swin_small_patch4_window7_224": 768,
"swin_base_patch4_window7_224": 1024,
"solider_tiny": 768,
"solider_small": 768,
"solider_base": 1024,
}
class SOLIDERConfig(PretrainedConfig):
model_type = "swin_transformer"
def __init__(
self,
pretrain_img_size=224,
in_channels=3,
embed_dims=96,
patch_size=4,
window_size=7,
mlp_ratio=4,
depths=(2, 2, 6, 2),
num_heads=(3, 6, 12, 24),
strides=(4, 2, 2, 2),
out_indices=(0, 1, 2, 3),
qkv_bias=True,
qk_scale=None,
patch_norm=True,
drop_rate=0.0,
attn_drop_rate=0.0,
drop_path_rate=0.0, # NOTE: I modified this from the implemenation of SOLIDER
use_abs_pos_embed=False,
act_cfg=dict(type="GELU"),
norm_cfg=dict(type="LN"),
with_cp=False,
pretrained=None,
convert_weights=False,
frozen_stages=-1,
init_cfg=None,
semantic_weight=0.5, # NOTE: I modified this from the implemenation of SOLIDER
name="solider_small",
**kwargs,
):
self.pretrain_img_size = pretrain_img_size
self.in_channels = in_channels
self.embed_dims = embed_dims
self.patch_size = patch_size
self.window_size = window_size
self.mlp_ratio = mlp_ratio
self.depths = depths
self.num_heads = num_heads
self.strides = strides
self.out_indices = out_indices
self.qkv_bias = qkv_bias
self.qk_scale = qk_scale
self.patch_norm = patch_norm
self.drop_rate = drop_rate
self.attn_drop_rate = attn_drop_rate
self.drop_path_rate = drop_path_rate
self.use_abs_pos_embed = use_abs_pos_embed
self.act_cfg = act_cfg
self.norm_cfg = norm_cfg
self.with_cp = with_cp
self.pretrained = pretrained
self.convert_weights = convert_weights
self.frozen_stages = frozen_stages
self.init_cfg = init_cfg
self.semantic_weight = semantic_weight
# NOTE: These below attributes are just for provide information!
# They are not effect on model building!
self.img_size = pretrain_img_size
assert name in BACKBONE_NAME2WIDTH
self.name = name
self.vision_width = BACKBONE_NAME2WIDTH[self.name]
self.hidden_size = self.embed_dims
super().__init__(**kwargs)