santit96's picture
Create the streamlit app that classifies the trash in an image into classes
fa84113
raw
history blame
18.3 kB
"""EfficientDet Configurations
Adapted from official impl at https://github.com/google/automl/tree/master/efficientdet
TODO use a different config system (OmegaConfig -> Hydra?), separate model from train specific hparams
"""
from omegaconf import OmegaConf
from copy import deepcopy
def default_detection_model_configs():
"""Returns a default detection configs."""
h = OmegaConf.create()
# model name.
h.name = 'tf_efficientdet_d1'
h.backbone_name = 'tf_efficientnet_b1'
h.backbone_args = None # FIXME sort out kwargs vs config for backbone creation
# model specific, input preprocessing parameters
h.image_size = (640, 640)
# dataset specific head parameters
h.num_classes = 90
# feature + anchor config
h.min_level = 3
h.max_level = 7
h.num_levels = h.max_level - h.min_level + 1
h.num_scales = 3
h.aspect_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
# ratio w/h: 2.0 means w=1.4, h=0.7. Can be computed with k-mean per dataset.
#h.aspect_ratios = [1.0, 2.0, 0.5]
h.anchor_scale = 4.0
# FPN and head config
h.pad_type = 'same' # original TF models require an equivalent of Tensorflow 'SAME' padding
h.act_type = 'swish'
h.norm_layer = None # defaults to batch norm when None
h.norm_kwargs = dict(eps=.001, momentum=.01)
h.box_class_repeats = 3
h.fpn_cell_repeats = 3
h.fpn_channels = 88
h.separable_conv = True
h.apply_bn_for_resampling = True
h.conv_after_downsample = False
h.conv_bn_relu_pattern = False
h.use_native_resize_op = False
h.pooling_type = None
h.redundant_bias = True # original TF models have back to back bias + BN layers, not necessary!
h.head_bn_level_first = False # change order of BN in head repeat list of lists, True for torchscript compat
h.fpn_name = None
h.fpn_config = None
h.fpn_drop_path_rate = 0. # No stochastic depth in default. NOTE not currently used, unstable training
# classification loss (used by train bench)
h.alpha = 0.25
h.gamma = 1.5
h.label_smoothing = 0. # only supported if new_focal == True
h.new_focal = False # use new focal loss (supports label smoothing but uses more mem, less optimal w/ jit script)
h.jit_loss = False # torchscript jit for loss fn speed improvement, can impact stability and/or increase mem usage
# localization loss (used by train bench)
h.delta = 0.1
h.box_loss_weight = 50.0
return h
efficientdet_model_param_dict = dict(
# Models with PyTorch friendly padding and my PyTorch pretrained backbones, training TBD
efficientdet_d0=dict(
name='efficientdet_d0',
backbone_name='efficientnet_b0',
image_size=(512, 512),
fpn_channels=64,
fpn_cell_repeats=3,
box_class_repeats=3,
pad_type='',
redundant_bias=False,
backbone_args=dict(drop_path_rate=0.1),
url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/efficientdet_d0-f3276ba8.pth',
),
efficientdet_d1=dict(
name='efficientdet_d1',
backbone_name='efficientnet_b1',
image_size=(640, 640),
fpn_channels=88,
fpn_cell_repeats=4,
box_class_repeats=3,
pad_type='',
redundant_bias=False,
backbone_args=dict(drop_path_rate=0.2),
url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/efficientdet_d1-bb7e98fe.pth',
),
efficientdet_d2=dict(
name='efficientdet_d2',
backbone_name='efficientnet_b2',
image_size=(768, 768),
fpn_channels=112,
fpn_cell_repeats=5,
box_class_repeats=3,
pad_type='',
redundant_bias=False,
backbone_args=dict(drop_path_rate=0.2),
url='', # no pretrained weights yet
),
efficientdet_d3=dict(
name='efficientdet_d3',
backbone_name='efficientnet_b3',
image_size=(896, 896),
fpn_channels=160,
fpn_cell_repeats=6,
box_class_repeats=4,
pad_type='',
redundant_bias=False,
backbone_args=dict(drop_path_rate=0.2),
url='', # no pretrained weights yet
),
efficientdet_d4=dict(
name='efficientdet_d4',
backbone_name='efficientnet_b4',
image_size=(1024, 1024),
fpn_channels=224,
fpn_cell_repeats=7,
box_class_repeats=4,
backbone_args=dict(drop_path_rate=0.2),
),
efficientdet_d5=dict(
name='efficientdet_d5',
backbone_name='efficientnet_b5',
image_size=(1280, 1280),
fpn_channels=288,
fpn_cell_repeats=7,
box_class_repeats=4,
backbone_args=dict(drop_path_rate=0.2),
url='',
),
# My own experimental configs with alternate models, training TBD
# Note: any 'timm' model in the EfficientDet family can be used as a backbone here.
resdet50=dict(
name='resdet50',
backbone_name='resnet50',
image_size=(640, 640),
fpn_channels=88,
fpn_cell_repeats=4,
box_class_repeats=3,
pad_type='',
act_type='relu',
redundant_bias=False,
separable_conv=False,
backbone_args=dict(drop_path_rate=0.2),
url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/resdet50_416-08676892.pth',
),
cspresdet50=dict(
name='cspresdet50',
backbone_name='cspresnet50',
image_size=(640, 640),
aspect_ratios=[1.0, 2.0, 0.5],
fpn_channels=88,
fpn_cell_repeats=4,
box_class_repeats=3,
pad_type='',
act_type='leaky_relu',
redundant_bias=False,
separable_conv=False,
head_bn_level_first=True,
backbone_args=dict(drop_path_rate=0.2),
url='',
),
cspresdext50=dict(
name='cspresdext50',
backbone_name='cspresnext50',
image_size=(640, 640),
aspect_ratios=[1.0, 2.0, 0.5],
fpn_channels=88,
fpn_cell_repeats=4,
box_class_repeats=3,
pad_type='',
act_type='leaky_relu',
redundant_bias=False,
separable_conv=False,
head_bn_level_first=True,
backbone_args=dict(drop_path_rate=0.2),
url='',
),
cspresdext50pan=dict(
name='cspresdext50pan',
backbone_name='cspresnext50',
image_size=(640, 640),
aspect_ratios=[1.0, 2.0, 0.5],
fpn_channels=88,
fpn_cell_repeats=3,
box_class_repeats=3,
pad_type='',
act_type='leaky_relu',
fpn_name='pan_fa', # PAN FPN experiment
redundant_bias=False,
separable_conv=False,
head_bn_level_first=True,
backbone_args=dict(drop_path_rate=0.2),
url='',
),
cspdarkdet53=dict(
name='cspdarkdet53',
backbone_name='cspdarknet53',
image_size=(640, 640),
aspect_ratios=[1.0, 2.0, 0.5],
fpn_channels=88,
fpn_cell_repeats=4,
box_class_repeats=3,
pad_type='',
act_type='leaky_relu',
redundant_bias=False,
separable_conv=False,
head_bn_level_first=True,
backbone_args=dict(drop_path_rate=0.2),
url='',
),
mixdet_m=dict(
name='mixdet_m',
backbone_name='mixnet_m',
image_size=(512, 512),
aspect_ratios=[1.0, 2.0, 0.5],
fpn_channels=64,
fpn_cell_repeats=3,
box_class_repeats=3,
pad_type='',
redundant_bias=False,
head_bn_level_first=True,
backbone_args=dict(drop_path_rate=0.1),
url='', # no pretrained weights yet
),
mixdet_l=dict(
name='mixdet_l',
backbone_name='mixnet_l',
image_size=(640, 640),
aspect_ratios=[1.0, 2.0, 0.5],
fpn_channels=88,
fpn_cell_repeats=4,
box_class_repeats=3,
pad_type='',
redundant_bias=False,
head_bn_level_first=True,
backbone_args=dict(drop_path_rate=0.2),
url='', # no pretrained weights yet
),
mobiledetv2_110d=dict(
name='mobiledetv2_110d',
backbone_name='mobilenetv2_110d',
image_size=(384, 384),
aspect_ratios=[1.0, 2.0, 0.5],
fpn_channels=48,
fpn_cell_repeats=3,
box_class_repeats=3,
pad_type='',
act_type='relu6',
redundant_bias=False,
head_bn_level_first=True,
backbone_args=dict(drop_path_rate=0.05),
url='', # no pretrained weights yet
),
mobiledetv2_120d=dict(
name='mobiledetv2_120d',
backbone_name='mobilenetv2_120d',
image_size=(512, 512),
aspect_ratios=[1.0, 2.0, 0.5],
fpn_channels=56,
fpn_cell_repeats=3,
box_class_repeats=3,
pad_type='',
act_type='relu6',
redundant_bias=False,
head_bn_level_first=True,
backbone_args=dict(drop_path_rate=0.1),
url='', # no pretrained weights yet
),
mobiledetv3_large=dict(
name='mobiledetv3_large',
backbone_name='mobilenetv3_large_100',
image_size=(512, 512),
aspect_ratios=[1.0, 2.0, 0.5],
fpn_channels=64,
fpn_cell_repeats=3,
box_class_repeats=3,
pad_type='',
act_type='hard_swish',
redundant_bias=False,
head_bn_level_first=True,
backbone_args=dict(drop_path_rate=0.1),
url='', # no pretrained weights yet
),
efficientdet_q0=dict(
name='efficientdet_q0',
backbone_name='efficientnet_b0',
image_size=(512, 512),
fpn_channels=64,
fpn_cell_repeats=3,
box_class_repeats=3,
pad_type='',
fpn_name='qufpn_fa', # quad-fpn + fast attn experiment
redundant_bias=False,
head_bn_level_first=True,
backbone_args=dict(drop_path_rate=0.1),
url='',
),
efficientdet_w0=dict(
name='efficientdet_w0', # 'wide'
backbone_name='efficientnet_b0',
image_size=(512, 512),
aspect_ratios=[1.0, 2.0, 0.5],
fpn_channels=80,
fpn_cell_repeats=3,
box_class_repeats=3,
pad_type='',
redundant_bias=False,
head_bn_level_first=True,
backbone_args=dict(
drop_path_rate=0.1,
feature_location='depthwise'), # features from after DW/SE in IR block
url='', # no pretrained weights yet
),
efficientdet_es=dict(
name='efficientdet_es', #EdgeTPU-Small
backbone_name='efficientnet_es',
image_size=(512, 512),
aspect_ratios=[1.0, 2.0, 0.5],
fpn_channels=72,
fpn_cell_repeats=3,
box_class_repeats=3,
pad_type='',
act_type='relu',
redundant_bias=False,
head_bn_level_first=True,
separable_conv=False,
backbone_args=dict(drop_path_rate=0.1),
url='',
),
efficientdet_em=dict(
name='efficientdet_em', # Edge-TPU Medium
backbone_name='efficientnet_em',
image_size=(640, 640),
aspect_ratios=[1.0, 2.0, 0.5],
fpn_channels=96,
fpn_cell_repeats=4,
box_class_repeats=3,
pad_type='',
act_type='relu',
redundant_bias=False,
head_bn_level_first=True,
separable_conv=False,
backbone_args=dict(drop_path_rate=0.2),
url='', # no pretrained weights yet
),
efficientdet_lite0=dict(
name='efficientdet_lite0',
backbone_name='efficientnet_lite0',
image_size=(512, 512),
fpn_channels=64,
fpn_cell_repeats=3,
box_class_repeats=3,
act_type='relu',
redundant_bias=False,
head_bn_level_first=True,
backbone_args=dict(drop_path_rate=0.1),
url='',
),
# Models ported from Tensorflow with pretrained backbones ported from Tensorflow
tf_efficientdet_d0=dict(
name='tf_efficientdet_d0',
backbone_name='tf_efficientnet_b0',
image_size=(512, 512),
fpn_channels=64,
fpn_cell_repeats=3,
box_class_repeats=3,
backbone_args=dict(drop_path_rate=0.2),
url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d0_34-f153e0cf.pth',
),
tf_efficientdet_d1=dict(
name='tf_efficientdet_d1',
backbone_name='tf_efficientnet_b1',
image_size=(640, 640),
fpn_channels=88,
fpn_cell_repeats=4,
box_class_repeats=3,
backbone_args=dict(drop_path_rate=0.2),
url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d1_40-a30f94af.pth'
),
tf_efficientdet_d2=dict(
name='tf_efficientdet_d2',
backbone_name='tf_efficientnet_b2',
image_size=(768, 768),
fpn_channels=112,
fpn_cell_repeats=5,
box_class_repeats=3,
backbone_args=dict(drop_path_rate=0.2),
url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d2_43-8107aa99.pth',
),
tf_efficientdet_d3=dict(
name='tf_efficientdet_d3',
backbone_name='tf_efficientnet_b3',
image_size=(896, 896),
fpn_channels=160,
fpn_cell_repeats=6,
box_class_repeats=4,
backbone_args=dict(drop_path_rate=0.2),
url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d3_47-0b525f35.pth',
),
tf_efficientdet_d4=dict(
name='tf_efficientdet_d4',
backbone_name='tf_efficientnet_b4',
image_size=(1024, 1024),
fpn_channels=224,
fpn_cell_repeats=7,
box_class_repeats=4,
backbone_args=dict(drop_path_rate=0.2),
url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d4_49-f56376d9.pth',
),
tf_efficientdet_d5=dict(
name='tf_efficientdet_d5',
backbone_name='tf_efficientnet_b5',
image_size=(1280, 1280),
fpn_channels=288,
fpn_cell_repeats=7,
box_class_repeats=4,
backbone_args=dict(drop_path_rate=0.2),
url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d5_51-c79f9be6.pth',
),
tf_efficientdet_d6=dict(
name='tf_efficientdet_d6',
backbone_name='tf_efficientnet_b6',
image_size=(1280, 1280),
fpn_channels=384,
fpn_cell_repeats=8,
box_class_repeats=5,
fpn_name='bifpn_sum', # Use unweighted sum for training stability.
backbone_args=dict(drop_path_rate=0.2),
url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d6_52-4eda3773.pth'
),
tf_efficientdet_d7=dict(
name='tf_efficientdet_d7',
backbone_name='tf_efficientnet_b6',
image_size=(1536, 1536),
fpn_channels=384,
fpn_cell_repeats=8,
box_class_repeats=5,
anchor_scale=5.0,
fpn_name='bifpn_sum', # Use unweighted sum for training stability.
backbone_args=dict(drop_path_rate=0.2),
url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d7_53-6d1d7a95.pth'
),
tf_efficientdet_d7x=dict(
name='tf_efficientdet_d7x',
backbone_name='tf_efficientnet_b7',
image_size=(1536, 1536),
fpn_channels=384,
fpn_cell_repeats=8,
box_class_repeats=5,
anchor_scale=4.0,
max_level=8,
fpn_name='bifpn_sum', # Use unweighted sum for training stability.
backbone_args=dict(drop_path_rate=0.2),
url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d7x-f390b87c.pth'
),
# The lite configs are in TF automl repository but no weights yet and listed as 'not final'
tf_efficientdet_lite0=dict(
name='tf_efficientdet_lite0',
backbone_name='tf_efficientnet_lite0',
image_size=(512, 512),
fpn_channels=64,
fpn_cell_repeats=3,
box_class_repeats=3,
act_type='relu',
redundant_bias=False,
backbone_args=dict(drop_path_rate=0.1),
# unlike other tf_ models, this was not ported from tf automl impl, but trained from tf pretrained efficient lite
# weights using this code, will likely replace if/when official det-lite weights are released
url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_lite0-f5f303a9.pth',
),
tf_efficientdet_lite1=dict(
name='tf_efficientdet_lite1',
backbone_name='tf_efficientnet_lite1',
image_size=(640, 640),
fpn_channels=88,
fpn_cell_repeats=4,
box_class_repeats=3,
act_type='relu',
backbone_args=dict(drop_path_rate=0.2),
url='', # no pretrained weights yet
),
tf_efficientdet_lite2=dict(
name='tf_efficientdet_lite2',
backbone_name='tf_efficientnet_lite2',
image_size=(768, 768),
fpn_channels=112,
fpn_cell_repeats=5,
box_class_repeats=3,
act_type='relu',
backbone_args=dict(drop_path_rate=0.2),
url='',
),
tf_efficientdet_lite3=dict(
name='tf_efficientdet_lite3',
backbone_name='tf_efficientnet_lite3',
image_size=(896, 896),
fpn_channels=160,
fpn_cell_repeats=6,
box_class_repeats=4,
act_type='relu',
backbone_args=dict(drop_path_rate=0.2),
url='',
),
tf_efficientdet_lite4=dict(
name='tf_efficientdet_lite4',
backbone_name='tf_efficientnet_lite4',
image_size=(1024, 1024),
fpn_channels=224,
fpn_cell_repeats=7,
box_class_repeats=4,
act_type='relu',
backbone_args=dict(drop_path_rate=0.2),
url='',
),
)
def get_efficientdet_config(model_name='tf_efficientdet_d1'):
"""Get the default config for EfficientDet based on model name."""
h = default_detection_model_configs()
h.update(efficientdet_model_param_dict[model_name])
h.num_levels = h.max_level - h.min_level + 1
return deepcopy(h) # may be unnecessary, ensure no references to param dict values