|
from transformers.configuration_utils import PretrainedConfig |
|
|
|
BACKBONE_NAME2WIDTH = { |
|
"swin_tiny_patch4_window7_224": 768, |
|
"swin_small_patch4_window7_224": 768, |
|
"swin_base_patch4_window7_224": 1024, |
|
"solider_tiny": 768, |
|
"solider_small": 768, |
|
"solider_base": 1024, |
|
} |
|
|
|
|
|
class SOLIDERConfig(PretrainedConfig): |
|
model_type = "swin_transformer" |
|
|
|
def __init__( |
|
self, |
|
pretrain_img_size=224, |
|
in_channels=3, |
|
embed_dims=96, |
|
patch_size=4, |
|
window_size=7, |
|
mlp_ratio=4, |
|
depths=(2, 2, 6, 2), |
|
num_heads=(3, 6, 12, 24), |
|
strides=(4, 2, 2, 2), |
|
out_indices=(0, 1, 2, 3), |
|
qkv_bias=True, |
|
qk_scale=None, |
|
patch_norm=True, |
|
drop_rate=0.0, |
|
attn_drop_rate=0.0, |
|
drop_path_rate=0.0, |
|
use_abs_pos_embed=False, |
|
act_cfg=dict(type="GELU"), |
|
norm_cfg=dict(type="LN"), |
|
with_cp=False, |
|
pretrained=None, |
|
convert_weights=False, |
|
frozen_stages=-1, |
|
init_cfg=None, |
|
semantic_weight=0.5, |
|
name="solider_small", |
|
**kwargs, |
|
): |
|
self.pretrain_img_size = pretrain_img_size |
|
self.in_channels = in_channels |
|
self.embed_dims = embed_dims |
|
self.patch_size = patch_size |
|
self.window_size = window_size |
|
self.mlp_ratio = mlp_ratio |
|
self.depths = depths |
|
self.num_heads = num_heads |
|
self.strides = strides |
|
self.out_indices = out_indices |
|
self.qkv_bias = qkv_bias |
|
self.qk_scale = qk_scale |
|
self.patch_norm = patch_norm |
|
self.drop_rate = drop_rate |
|
self.attn_drop_rate = attn_drop_rate |
|
self.drop_path_rate = drop_path_rate |
|
self.use_abs_pos_embed = use_abs_pos_embed |
|
self.act_cfg = act_cfg |
|
self.norm_cfg = norm_cfg |
|
self.with_cp = with_cp |
|
self.pretrained = pretrained |
|
self.convert_weights = convert_weights |
|
self.frozen_stages = frozen_stages |
|
self.init_cfg = init_cfg |
|
self.semantic_weight = semantic_weight |
|
|
|
|
|
|
|
self.img_size = pretrain_img_size |
|
assert name in BACKBONE_NAME2WIDTH |
|
self.name = name |
|
self.vision_width = BACKBONE_NAME2WIDTH[self.name] |
|
self.hidden_size = self.embed_dims |
|
|
|
super().__init__(**kwargs) |
|
|