"""EfficientDet Configurations Adapted from official impl at https://github.com/google/automl/tree/master/efficientdet TODO use a different config system (OmegaConfig -> Hydra?), separate model from train specific hparams """ from omegaconf import OmegaConf from copy import deepcopy def default_detection_model_configs(): """Returns a default detection configs.""" h = OmegaConf.create() # model name. h.name = 'tf_efficientdet_d1' h.backbone_name = 'tf_efficientnet_b1' h.backbone_args = None # FIXME sort out kwargs vs config for backbone creation # model specific, input preprocessing parameters h.image_size = (640, 640) # dataset specific head parameters h.num_classes = 90 # feature + anchor config h.min_level = 3 h.max_level = 7 h.num_levels = h.max_level - h.min_level + 1 h.num_scales = 3 h.aspect_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] # ratio w/h: 2.0 means w=1.4, h=0.7. Can be computed with k-mean per dataset. #h.aspect_ratios = [1.0, 2.0, 0.5] h.anchor_scale = 4.0 # FPN and head config h.pad_type = 'same' # original TF models require an equivalent of Tensorflow 'SAME' padding h.act_type = 'swish' h.norm_layer = None # defaults to batch norm when None h.norm_kwargs = dict(eps=.001, momentum=.01) h.box_class_repeats = 3 h.fpn_cell_repeats = 3 h.fpn_channels = 88 h.separable_conv = True h.apply_bn_for_resampling = True h.conv_after_downsample = False h.conv_bn_relu_pattern = False h.use_native_resize_op = False h.pooling_type = None h.redundant_bias = True # original TF models have back to back bias + BN layers, not necessary! h.head_bn_level_first = False # change order of BN in head repeat list of lists, True for torchscript compat h.fpn_name = None h.fpn_config = None h.fpn_drop_path_rate = 0. # No stochastic depth in default. NOTE not currently used, unstable training # classification loss (used by train bench) h.alpha = 0.25 h.gamma = 1.5 h.label_smoothing = 0. # only supported if new_focal == True h.new_focal = False # use new focal loss (supports label smoothing but uses more mem, less optimal w/ jit script) h.jit_loss = False # torchscript jit for loss fn speed improvement, can impact stability and/or increase mem usage # localization loss (used by train bench) h.delta = 0.1 h.box_loss_weight = 50.0 return h efficientdet_model_param_dict = dict( # Models with PyTorch friendly padding and my PyTorch pretrained backbones, training TBD efficientdet_d0=dict( name='efficientdet_d0', backbone_name='efficientnet_b0', image_size=(512, 512), fpn_channels=64, fpn_cell_repeats=3, box_class_repeats=3, pad_type='', redundant_bias=False, backbone_args=dict(drop_path_rate=0.1), url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/efficientdet_d0-f3276ba8.pth', ), efficientdet_d1=dict( name='efficientdet_d1', backbone_name='efficientnet_b1', image_size=(640, 640), fpn_channels=88, fpn_cell_repeats=4, box_class_repeats=3, pad_type='', redundant_bias=False, backbone_args=dict(drop_path_rate=0.2), url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/efficientdet_d1-bb7e98fe.pth', ), efficientdet_d2=dict( name='efficientdet_d2', backbone_name='efficientnet_b2', image_size=(768, 768), fpn_channels=112, fpn_cell_repeats=5, box_class_repeats=3, pad_type='', redundant_bias=False, backbone_args=dict(drop_path_rate=0.2), url='', # no pretrained weights yet ), efficientdet_d3=dict( name='efficientdet_d3', backbone_name='efficientnet_b3', image_size=(896, 896), fpn_channels=160, fpn_cell_repeats=6, box_class_repeats=4, pad_type='', redundant_bias=False, backbone_args=dict(drop_path_rate=0.2), url='', # no pretrained weights yet ), efficientdet_d4=dict( name='efficientdet_d4', backbone_name='efficientnet_b4', image_size=(1024, 1024), fpn_channels=224, fpn_cell_repeats=7, box_class_repeats=4, backbone_args=dict(drop_path_rate=0.2), ), efficientdet_d5=dict( name='efficientdet_d5', backbone_name='efficientnet_b5', image_size=(1280, 1280), fpn_channels=288, fpn_cell_repeats=7, box_class_repeats=4, backbone_args=dict(drop_path_rate=0.2), url='', ), # My own experimental configs with alternate models, training TBD # Note: any 'timm' model in the EfficientDet family can be used as a backbone here. resdet50=dict( name='resdet50', backbone_name='resnet50', image_size=(640, 640), fpn_channels=88, fpn_cell_repeats=4, box_class_repeats=3, pad_type='', act_type='relu', redundant_bias=False, separable_conv=False, backbone_args=dict(drop_path_rate=0.2), url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/resdet50_416-08676892.pth', ), cspresdet50=dict( name='cspresdet50', backbone_name='cspresnet50', image_size=(640, 640), aspect_ratios=[1.0, 2.0, 0.5], fpn_channels=88, fpn_cell_repeats=4, box_class_repeats=3, pad_type='', act_type='leaky_relu', redundant_bias=False, separable_conv=False, head_bn_level_first=True, backbone_args=dict(drop_path_rate=0.2), url='', ), cspresdext50=dict( name='cspresdext50', backbone_name='cspresnext50', image_size=(640, 640), aspect_ratios=[1.0, 2.0, 0.5], fpn_channels=88, fpn_cell_repeats=4, box_class_repeats=3, pad_type='', act_type='leaky_relu', redundant_bias=False, separable_conv=False, head_bn_level_first=True, backbone_args=dict(drop_path_rate=0.2), url='', ), cspresdext50pan=dict( name='cspresdext50pan', backbone_name='cspresnext50', image_size=(640, 640), aspect_ratios=[1.0, 2.0, 0.5], fpn_channels=88, fpn_cell_repeats=3, box_class_repeats=3, pad_type='', act_type='leaky_relu', fpn_name='pan_fa', # PAN FPN experiment redundant_bias=False, separable_conv=False, head_bn_level_first=True, backbone_args=dict(drop_path_rate=0.2), url='', ), cspdarkdet53=dict( name='cspdarkdet53', backbone_name='cspdarknet53', image_size=(640, 640), aspect_ratios=[1.0, 2.0, 0.5], fpn_channels=88, fpn_cell_repeats=4, box_class_repeats=3, pad_type='', act_type='leaky_relu', redundant_bias=False, separable_conv=False, head_bn_level_first=True, backbone_args=dict(drop_path_rate=0.2), url='', ), mixdet_m=dict( name='mixdet_m', backbone_name='mixnet_m', image_size=(512, 512), aspect_ratios=[1.0, 2.0, 0.5], fpn_channels=64, fpn_cell_repeats=3, box_class_repeats=3, pad_type='', redundant_bias=False, head_bn_level_first=True, backbone_args=dict(drop_path_rate=0.1), url='', # no pretrained weights yet ), mixdet_l=dict( name='mixdet_l', backbone_name='mixnet_l', image_size=(640, 640), aspect_ratios=[1.0, 2.0, 0.5], fpn_channels=88, fpn_cell_repeats=4, box_class_repeats=3, pad_type='', redundant_bias=False, head_bn_level_first=True, backbone_args=dict(drop_path_rate=0.2), url='', # no pretrained weights yet ), mobiledetv2_110d=dict( name='mobiledetv2_110d', backbone_name='mobilenetv2_110d', image_size=(384, 384), aspect_ratios=[1.0, 2.0, 0.5], fpn_channels=48, fpn_cell_repeats=3, box_class_repeats=3, pad_type='', act_type='relu6', redundant_bias=False, head_bn_level_first=True, backbone_args=dict(drop_path_rate=0.05), url='', # no pretrained weights yet ), mobiledetv2_120d=dict( name='mobiledetv2_120d', backbone_name='mobilenetv2_120d', image_size=(512, 512), aspect_ratios=[1.0, 2.0, 0.5], fpn_channels=56, fpn_cell_repeats=3, box_class_repeats=3, pad_type='', act_type='relu6', redundant_bias=False, head_bn_level_first=True, backbone_args=dict(drop_path_rate=0.1), url='', # no pretrained weights yet ), mobiledetv3_large=dict( name='mobiledetv3_large', backbone_name='mobilenetv3_large_100', image_size=(512, 512), aspect_ratios=[1.0, 2.0, 0.5], fpn_channels=64, fpn_cell_repeats=3, box_class_repeats=3, pad_type='', act_type='hard_swish', redundant_bias=False, head_bn_level_first=True, backbone_args=dict(drop_path_rate=0.1), url='', # no pretrained weights yet ), efficientdet_q0=dict( name='efficientdet_q0', backbone_name='efficientnet_b0', image_size=(512, 512), fpn_channels=64, fpn_cell_repeats=3, box_class_repeats=3, pad_type='', fpn_name='qufpn_fa', # quad-fpn + fast attn experiment redundant_bias=False, head_bn_level_first=True, backbone_args=dict(drop_path_rate=0.1), url='', ), efficientdet_w0=dict( name='efficientdet_w0', # 'wide' backbone_name='efficientnet_b0', image_size=(512, 512), aspect_ratios=[1.0, 2.0, 0.5], fpn_channels=80, fpn_cell_repeats=3, box_class_repeats=3, pad_type='', redundant_bias=False, head_bn_level_first=True, backbone_args=dict( drop_path_rate=0.1, feature_location='depthwise'), # features from after DW/SE in IR block url='', # no pretrained weights yet ), efficientdet_es=dict( name='efficientdet_es', #EdgeTPU-Small backbone_name='efficientnet_es', image_size=(512, 512), aspect_ratios=[1.0, 2.0, 0.5], fpn_channels=72, fpn_cell_repeats=3, box_class_repeats=3, pad_type='', act_type='relu', redundant_bias=False, head_bn_level_first=True, separable_conv=False, backbone_args=dict(drop_path_rate=0.1), url='', ), efficientdet_em=dict( name='efficientdet_em', # Edge-TPU Medium backbone_name='efficientnet_em', image_size=(640, 640), aspect_ratios=[1.0, 2.0, 0.5], fpn_channels=96, fpn_cell_repeats=4, box_class_repeats=3, pad_type='', act_type='relu', redundant_bias=False, head_bn_level_first=True, separable_conv=False, backbone_args=dict(drop_path_rate=0.2), url='', # no pretrained weights yet ), efficientdet_lite0=dict( name='efficientdet_lite0', backbone_name='efficientnet_lite0', image_size=(512, 512), fpn_channels=64, fpn_cell_repeats=3, box_class_repeats=3, act_type='relu', redundant_bias=False, head_bn_level_first=True, backbone_args=dict(drop_path_rate=0.1), url='', ), # Models ported from Tensorflow with pretrained backbones ported from Tensorflow tf_efficientdet_d0=dict( name='tf_efficientdet_d0', backbone_name='tf_efficientnet_b0', image_size=(512, 512), fpn_channels=64, fpn_cell_repeats=3, box_class_repeats=3, backbone_args=dict(drop_path_rate=0.2), url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d0_34-f153e0cf.pth', ), tf_efficientdet_d1=dict( name='tf_efficientdet_d1', backbone_name='tf_efficientnet_b1', image_size=(640, 640), fpn_channels=88, fpn_cell_repeats=4, box_class_repeats=3, backbone_args=dict(drop_path_rate=0.2), url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d1_40-a30f94af.pth' ), tf_efficientdet_d2=dict( name='tf_efficientdet_d2', backbone_name='tf_efficientnet_b2', image_size=(768, 768), fpn_channels=112, fpn_cell_repeats=5, box_class_repeats=3, backbone_args=dict(drop_path_rate=0.2), url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d2_43-8107aa99.pth', ), tf_efficientdet_d3=dict( name='tf_efficientdet_d3', backbone_name='tf_efficientnet_b3', image_size=(896, 896), fpn_channels=160, fpn_cell_repeats=6, box_class_repeats=4, backbone_args=dict(drop_path_rate=0.2), url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d3_47-0b525f35.pth', ), tf_efficientdet_d4=dict( name='tf_efficientdet_d4', backbone_name='tf_efficientnet_b4', image_size=(1024, 1024), fpn_channels=224, fpn_cell_repeats=7, box_class_repeats=4, backbone_args=dict(drop_path_rate=0.2), url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d4_49-f56376d9.pth', ), tf_efficientdet_d5=dict( name='tf_efficientdet_d5', backbone_name='tf_efficientnet_b5', image_size=(1280, 1280), fpn_channels=288, fpn_cell_repeats=7, box_class_repeats=4, backbone_args=dict(drop_path_rate=0.2), url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d5_51-c79f9be6.pth', ), tf_efficientdet_d6=dict( name='tf_efficientdet_d6', backbone_name='tf_efficientnet_b6', image_size=(1280, 1280), fpn_channels=384, fpn_cell_repeats=8, box_class_repeats=5, fpn_name='bifpn_sum', # Use unweighted sum for training stability. backbone_args=dict(drop_path_rate=0.2), url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d6_52-4eda3773.pth' ), tf_efficientdet_d7=dict( name='tf_efficientdet_d7', backbone_name='tf_efficientnet_b6', image_size=(1536, 1536), fpn_channels=384, fpn_cell_repeats=8, box_class_repeats=5, anchor_scale=5.0, fpn_name='bifpn_sum', # Use unweighted sum for training stability. backbone_args=dict(drop_path_rate=0.2), url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d7_53-6d1d7a95.pth' ), tf_efficientdet_d7x=dict( name='tf_efficientdet_d7x', backbone_name='tf_efficientnet_b7', image_size=(1536, 1536), fpn_channels=384, fpn_cell_repeats=8, box_class_repeats=5, anchor_scale=4.0, max_level=8, fpn_name='bifpn_sum', # Use unweighted sum for training stability. backbone_args=dict(drop_path_rate=0.2), url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d7x-f390b87c.pth' ), # The lite configs are in TF automl repository but no weights yet and listed as 'not final' tf_efficientdet_lite0=dict( name='tf_efficientdet_lite0', backbone_name='tf_efficientnet_lite0', image_size=(512, 512), fpn_channels=64, fpn_cell_repeats=3, box_class_repeats=3, act_type='relu', redundant_bias=False, backbone_args=dict(drop_path_rate=0.1), # unlike other tf_ models, this was not ported from tf automl impl, but trained from tf pretrained efficient lite # weights using this code, will likely replace if/when official det-lite weights are released url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_lite0-f5f303a9.pth', ), tf_efficientdet_lite1=dict( name='tf_efficientdet_lite1', backbone_name='tf_efficientnet_lite1', image_size=(640, 640), fpn_channels=88, fpn_cell_repeats=4, box_class_repeats=3, act_type='relu', backbone_args=dict(drop_path_rate=0.2), url='', # no pretrained weights yet ), tf_efficientdet_lite2=dict( name='tf_efficientdet_lite2', backbone_name='tf_efficientnet_lite2', image_size=(768, 768), fpn_channels=112, fpn_cell_repeats=5, box_class_repeats=3, act_type='relu', backbone_args=dict(drop_path_rate=0.2), url='', ), tf_efficientdet_lite3=dict( name='tf_efficientdet_lite3', backbone_name='tf_efficientnet_lite3', image_size=(896, 896), fpn_channels=160, fpn_cell_repeats=6, box_class_repeats=4, act_type='relu', backbone_args=dict(drop_path_rate=0.2), url='', ), tf_efficientdet_lite4=dict( name='tf_efficientdet_lite4', backbone_name='tf_efficientnet_lite4', image_size=(1024, 1024), fpn_channels=224, fpn_cell_repeats=7, box_class_repeats=4, act_type='relu', backbone_args=dict(drop_path_rate=0.2), url='', ), ) def get_efficientdet_config(model_name='tf_efficientdet_d1'): """Get the default config for EfficientDet based on model name.""" h = default_detection_model_configs() h.update(efficientdet_model_param_dict[model_name]) h.num_levels = h.max_level - h.min_level + 1 return deepcopy(h) # may be unnecessary, ensure no references to param dict values