Spaces:
Running
Running
File size: 5,052 Bytes
28c256d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import torch
import torch.nn as nn
from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule
from mmengine.model import BaseModule
from mmdet.registry import MODELS
@MODELS.register_module()
class SSDNeck(BaseModule):
"""Extra layers of SSD backbone to generate multi-scale feature maps.
Args:
in_channels (Sequence[int]): Number of input channels per scale.
out_channels (Sequence[int]): Number of output channels per scale.
level_strides (Sequence[int]): Stride of 3x3 conv per level.
level_paddings (Sequence[int]): Padding size of 3x3 conv per level.
l2_norm_scale (float|None): L2 normalization layer init scale.
If None, not use L2 normalization on the first input feature.
last_kernel_size (int): Kernel size of the last conv layer.
Default: 3.
use_depthwise (bool): Whether to use DepthwiseSeparableConv.
Default: False.
conv_cfg (dict): Config dict for convolution layer. Default: None.
norm_cfg (dict): Dictionary to construct and config norm layer.
Default: None.
act_cfg (dict): Config dict for activation layer.
Default: dict(type='ReLU').
init_cfg (dict or list[dict], optional): Initialization config dict.
"""
def __init__(self,
in_channels,
out_channels,
level_strides,
level_paddings,
l2_norm_scale=20.,
last_kernel_size=3,
use_depthwise=False,
conv_cfg=None,
norm_cfg=None,
act_cfg=dict(type='ReLU'),
init_cfg=[
dict(
type='Xavier', distribution='uniform',
layer='Conv2d'),
dict(type='Constant', val=1, layer='BatchNorm2d'),
]):
super(SSDNeck, self).__init__(init_cfg)
assert len(out_channels) > len(in_channels)
assert len(out_channels) - len(in_channels) == len(level_strides)
assert len(level_strides) == len(level_paddings)
assert in_channels == out_channels[:len(in_channels)]
if l2_norm_scale:
self.l2_norm = L2Norm(in_channels[0], l2_norm_scale)
self.init_cfg += [
dict(
type='Constant',
val=self.l2_norm.scale,
override=dict(name='l2_norm'))
]
self.extra_layers = nn.ModuleList()
extra_layer_channels = out_channels[len(in_channels):]
second_conv = DepthwiseSeparableConvModule if \
use_depthwise else ConvModule
for i, (out_channel, stride, padding) in enumerate(
zip(extra_layer_channels, level_strides, level_paddings)):
kernel_size = last_kernel_size \
if i == len(extra_layer_channels) - 1 else 3
per_lvl_convs = nn.Sequential(
ConvModule(
out_channels[len(in_channels) - 1 + i],
out_channel // 2,
1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg),
second_conv(
out_channel // 2,
out_channel,
kernel_size,
stride=stride,
padding=padding,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg))
self.extra_layers.append(per_lvl_convs)
def forward(self, inputs):
"""Forward function."""
outs = [feat for feat in inputs]
if hasattr(self, 'l2_norm'):
outs[0] = self.l2_norm(outs[0])
feat = outs[-1]
for layer in self.extra_layers:
feat = layer(feat)
outs.append(feat)
return tuple(outs)
class L2Norm(nn.Module):
def __init__(self, n_dims, scale=20., eps=1e-10):
"""L2 normalization layer.
Args:
n_dims (int): Number of dimensions to be normalized
scale (float, optional): Defaults to 20..
eps (float, optional): Used to avoid division by zero.
Defaults to 1e-10.
"""
super(L2Norm, self).__init__()
self.n_dims = n_dims
self.weight = nn.Parameter(torch.Tensor(self.n_dims))
self.eps = eps
self.scale = scale
def forward(self, x):
"""Forward function."""
# normalization layer convert to FP32 in FP16 training
x_float = x.float()
norm = x_float.pow(2).sum(1, keepdim=True).sqrt() + self.eps
return (self.weight[None, :, None, None].float().expand_as(x_float) *
x_float / norm).type_as(x)
|