Spaces:
Runtime error
Runtime error
| # Copyright (c) Facebook, Inc. and its affiliates. | |
| import math | |
| from functools import lru_cache | |
| import torch | |
| from torch import nn | |
| from torch.autograd import Function | |
| from torch.autograd.function import once_differentiable | |
| from torch.nn.modules.utils import _pair | |
| from torchvision.ops import deform_conv2d | |
| from detectron2.utils.develop import create_dummy_class, create_dummy_func | |
| from .wrappers import _NewEmptyTensorOp | |
| class _DeformConv(Function): | |
| def forward( | |
| ctx, | |
| input, | |
| offset, | |
| weight, | |
| stride=1, | |
| padding=0, | |
| dilation=1, | |
| groups=1, | |
| deformable_groups=1, | |
| im2col_step=64, | |
| ): | |
| if input is not None and input.dim() != 4: | |
| raise ValueError( | |
| "Expected 4D tensor as input, got {}D tensor instead.".format(input.dim()) | |
| ) | |
| ctx.stride = _pair(stride) | |
| ctx.padding = _pair(padding) | |
| ctx.dilation = _pair(dilation) | |
| ctx.groups = groups | |
| ctx.deformable_groups = deformable_groups | |
| ctx.im2col_step = im2col_step | |
| ctx.save_for_backward(input, offset, weight) | |
| output = input.new_empty( | |
| _DeformConv._output_size(input, weight, ctx.padding, ctx.dilation, ctx.stride) | |
| ) | |
| ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones | |
| if not input.is_cuda: | |
| # TODO: let torchvision support full features of our deformconv. | |
| if deformable_groups != 1: | |
| raise NotImplementedError( | |
| "Deformable Conv with deformable_groups != 1 is not supported on CPUs!" | |
| ) | |
| return deform_conv2d( | |
| input, offset, weight, stride=stride, padding=padding, dilation=dilation | |
| ) | |
| else: | |
| cur_im2col_step = _DeformConv._cal_im2col_step(input.shape[0], ctx.im2col_step) | |
| assert (input.shape[0] % cur_im2col_step) == 0, "im2col step must divide batchsize" | |
| _C.deform_conv_forward( | |
| input, | |
| weight, | |
| offset, | |
| output, | |
| ctx.bufs_[0], | |
| ctx.bufs_[1], | |
| weight.size(3), | |
| weight.size(2), | |
| ctx.stride[1], | |
| ctx.stride[0], | |
| ctx.padding[1], | |
| ctx.padding[0], | |
| ctx.dilation[1], | |
| ctx.dilation[0], | |
| ctx.groups, | |
| ctx.deformable_groups, | |
| cur_im2col_step, | |
| ) | |
| return output | |
| def backward(ctx, grad_output): | |
| input, offset, weight = ctx.saved_tensors | |
| grad_input = grad_offset = grad_weight = None | |
| if not grad_output.is_cuda: | |
| raise NotImplementedError("Deformable Conv is not supported on CPUs!") | |
| else: | |
| cur_im2col_step = _DeformConv._cal_im2col_step(input.shape[0], ctx.im2col_step) | |
| assert (input.shape[0] % cur_im2col_step) == 0, "im2col step must divide batchsize" | |
| if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]: | |
| grad_input = torch.zeros_like(input) | |
| grad_offset = torch.zeros_like(offset) | |
| _C.deform_conv_backward_input( | |
| input, | |
| offset, | |
| grad_output, | |
| grad_input, | |
| grad_offset, | |
| weight, | |
| ctx.bufs_[0], | |
| weight.size(3), | |
| weight.size(2), | |
| ctx.stride[1], | |
| ctx.stride[0], | |
| ctx.padding[1], | |
| ctx.padding[0], | |
| ctx.dilation[1], | |
| ctx.dilation[0], | |
| ctx.groups, | |
| ctx.deformable_groups, | |
| cur_im2col_step, | |
| ) | |
| if ctx.needs_input_grad[2]: | |
| grad_weight = torch.zeros_like(weight) | |
| _C.deform_conv_backward_filter( | |
| input, | |
| offset, | |
| grad_output, | |
| grad_weight, | |
| ctx.bufs_[0], | |
| ctx.bufs_[1], | |
| weight.size(3), | |
| weight.size(2), | |
| ctx.stride[1], | |
| ctx.stride[0], | |
| ctx.padding[1], | |
| ctx.padding[0], | |
| ctx.dilation[1], | |
| ctx.dilation[0], | |
| ctx.groups, | |
| ctx.deformable_groups, | |
| 1, | |
| cur_im2col_step, | |
| ) | |
| return grad_input, grad_offset, grad_weight, None, None, None, None, None, None | |
| def _output_size(input, weight, padding, dilation, stride): | |
| channels = weight.size(0) | |
| output_size = (input.size(0), channels) | |
| for d in range(input.dim() - 2): | |
| in_size = input.size(d + 2) | |
| pad = padding[d] | |
| kernel = dilation[d] * (weight.size(d + 2) - 1) + 1 | |
| stride_ = stride[d] | |
| output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1,) | |
| if not all(map(lambda s: s > 0, output_size)): | |
| raise ValueError( | |
| "convolution input is too small (output would be {})".format( | |
| "x".join(map(str, output_size)) | |
| ) | |
| ) | |
| return output_size | |
| def _cal_im2col_step(input_size, default_size): | |
| """ | |
| Calculate proper im2col step size, which should be divisible by input_size and not larger | |
| than prefer_size. Meanwhile the step size should be as large as possible to be more | |
| efficient. So we choose the largest one among all divisors of input_size which are smaller | |
| than prefer_size. | |
| :param input_size: input batch size . | |
| :param default_size: default preferred im2col step size. | |
| :return: the largest proper step size. | |
| """ | |
| if input_size <= default_size: | |
| return input_size | |
| best_step = 1 | |
| for step in range(2, min(int(math.sqrt(input_size)) + 1, default_size)): | |
| if input_size % step == 0: | |
| if input_size // step <= default_size: | |
| return input_size // step | |
| best_step = step | |
| return best_step | |
| class _ModulatedDeformConv(Function): | |
| def forward( | |
| ctx, | |
| input, | |
| offset, | |
| mask, | |
| weight, | |
| bias=None, | |
| stride=1, | |
| padding=0, | |
| dilation=1, | |
| groups=1, | |
| deformable_groups=1, | |
| ): | |
| ctx.stride = stride | |
| ctx.padding = padding | |
| ctx.dilation = dilation | |
| ctx.groups = groups | |
| ctx.deformable_groups = deformable_groups | |
| ctx.with_bias = bias is not None | |
| if not ctx.with_bias: | |
| bias = input.new_empty(1) # fake tensor | |
| if not input.is_cuda: | |
| raise NotImplementedError("Deformable Conv is not supported on CPUs!") | |
| if ( | |
| weight.requires_grad | |
| or mask.requires_grad | |
| or offset.requires_grad | |
| or input.requires_grad | |
| ): | |
| ctx.save_for_backward(input, offset, mask, weight, bias) | |
| output = input.new_empty(_ModulatedDeformConv._infer_shape(ctx, input, weight)) | |
| ctx._bufs = [input.new_empty(0), input.new_empty(0)] | |
| _C.modulated_deform_conv_forward( | |
| input, | |
| weight, | |
| bias, | |
| ctx._bufs[0], | |
| offset, | |
| mask, | |
| output, | |
| ctx._bufs[1], | |
| weight.shape[2], | |
| weight.shape[3], | |
| ctx.stride, | |
| ctx.stride, | |
| ctx.padding, | |
| ctx.padding, | |
| ctx.dilation, | |
| ctx.dilation, | |
| ctx.groups, | |
| ctx.deformable_groups, | |
| ctx.with_bias, | |
| ) | |
| return output | |
| def backward(ctx, grad_output): | |
| if not grad_output.is_cuda: | |
| raise NotImplementedError("Deformable Conv is not supported on CPUs!") | |
| input, offset, mask, weight, bias = ctx.saved_tensors | |
| grad_input = torch.zeros_like(input) | |
| grad_offset = torch.zeros_like(offset) | |
| grad_mask = torch.zeros_like(mask) | |
| grad_weight = torch.zeros_like(weight) | |
| grad_bias = torch.zeros_like(bias) | |
| _C.modulated_deform_conv_backward( | |
| input, | |
| weight, | |
| bias, | |
| ctx._bufs[0], | |
| offset, | |
| mask, | |
| ctx._bufs[1], | |
| grad_input, | |
| grad_weight, | |
| grad_bias, | |
| grad_offset, | |
| grad_mask, | |
| grad_output, | |
| weight.shape[2], | |
| weight.shape[3], | |
| ctx.stride, | |
| ctx.stride, | |
| ctx.padding, | |
| ctx.padding, | |
| ctx.dilation, | |
| ctx.dilation, | |
| ctx.groups, | |
| ctx.deformable_groups, | |
| ctx.with_bias, | |
| ) | |
| if not ctx.with_bias: | |
| grad_bias = None | |
| return ( | |
| grad_input, | |
| grad_offset, | |
| grad_mask, | |
| grad_weight, | |
| grad_bias, | |
| None, | |
| None, | |
| None, | |
| None, | |
| None, | |
| ) | |
| def _infer_shape(ctx, input, weight): | |
| n = input.size(0) | |
| channels_out = weight.size(0) | |
| height, width = input.shape[2:4] | |
| kernel_h, kernel_w = weight.shape[2:4] | |
| height_out = ( | |
| height + 2 * ctx.padding - (ctx.dilation * (kernel_h - 1) + 1) | |
| ) // ctx.stride + 1 | |
| width_out = ( | |
| width + 2 * ctx.padding - (ctx.dilation * (kernel_w - 1) + 1) | |
| ) // ctx.stride + 1 | |
| return n, channels_out, height_out, width_out | |
| deform_conv = _DeformConv.apply | |
| modulated_deform_conv = _ModulatedDeformConv.apply | |
| class DeformConv(nn.Module): | |
| def __init__( | |
| self, | |
| in_channels, | |
| out_channels, | |
| kernel_size, | |
| stride=1, | |
| padding=0, | |
| dilation=1, | |
| groups=1, | |
| deformable_groups=1, | |
| bias=False, | |
| norm=None, | |
| activation=None, | |
| ): | |
| """ | |
| Deformable convolution from :paper:`deformconv`. | |
| Arguments are similar to :class:`Conv2D`. Extra arguments: | |
| Args: | |
| deformable_groups (int): number of groups used in deformable convolution. | |
| norm (nn.Module, optional): a normalization layer | |
| activation (callable(Tensor) -> Tensor): a callable activation function | |
| """ | |
| super(DeformConv, self).__init__() | |
| assert not bias | |
| assert in_channels % groups == 0, "in_channels {} cannot be divisible by groups {}".format( | |
| in_channels, groups | |
| ) | |
| assert ( | |
| out_channels % groups == 0 | |
| ), "out_channels {} cannot be divisible by groups {}".format(out_channels, groups) | |
| self.in_channels = in_channels | |
| self.out_channels = out_channels | |
| self.kernel_size = _pair(kernel_size) | |
| self.stride = _pair(stride) | |
| self.padding = _pair(padding) | |
| self.dilation = _pair(dilation) | |
| self.groups = groups | |
| self.deformable_groups = deformable_groups | |
| self.norm = norm | |
| self.activation = activation | |
| self.weight = nn.Parameter( | |
| torch.Tensor(out_channels, in_channels // self.groups, *self.kernel_size) | |
| ) | |
| self.bias = None | |
| nn.init.kaiming_uniform_(self.weight, nonlinearity="relu") | |
| def forward(self, x, offset): | |
| if x.numel() == 0: | |
| # When input is empty, we want to return a empty tensor with "correct" shape, | |
| # So that the following operations will not panic | |
| # if they check for the shape of the tensor. | |
| # This computes the height and width of the output tensor | |
| output_shape = [ | |
| (i + 2 * p - (di * (k - 1) + 1)) // s + 1 | |
| for i, p, di, k, s in zip( | |
| x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride | |
| ) | |
| ] | |
| output_shape = [x.shape[0], self.weight.shape[0]] + output_shape | |
| return _NewEmptyTensorOp.apply(x, output_shape) | |
| x = deform_conv( | |
| x, | |
| offset, | |
| self.weight, | |
| self.stride, | |
| self.padding, | |
| self.dilation, | |
| self.groups, | |
| self.deformable_groups, | |
| ) | |
| if self.norm is not None: | |
| x = self.norm(x) | |
| if self.activation is not None: | |
| x = self.activation(x) | |
| return x | |
| def extra_repr(self): | |
| tmpstr = "in_channels=" + str(self.in_channels) | |
| tmpstr += ", out_channels=" + str(self.out_channels) | |
| tmpstr += ", kernel_size=" + str(self.kernel_size) | |
| tmpstr += ", stride=" + str(self.stride) | |
| tmpstr += ", padding=" + str(self.padding) | |
| tmpstr += ", dilation=" + str(self.dilation) | |
| tmpstr += ", groups=" + str(self.groups) | |
| tmpstr += ", deformable_groups=" + str(self.deformable_groups) | |
| tmpstr += ", bias=False" | |
| return tmpstr | |
| class ModulatedDeformConv(nn.Module): | |
| def __init__( | |
| self, | |
| in_channels, | |
| out_channels, | |
| kernel_size, | |
| stride=1, | |
| padding=0, | |
| dilation=1, | |
| groups=1, | |
| deformable_groups=1, | |
| bias=True, | |
| norm=None, | |
| activation=None, | |
| ): | |
| """ | |
| Modulated deformable convolution from :paper:`deformconv2`. | |
| Arguments are similar to :class:`Conv2D`. Extra arguments: | |
| Args: | |
| deformable_groups (int): number of groups used in deformable convolution. | |
| norm (nn.Module, optional): a normalization layer | |
| activation (callable(Tensor) -> Tensor): a callable activation function | |
| """ | |
| super(ModulatedDeformConv, self).__init__() | |
| self.in_channels = in_channels | |
| self.out_channels = out_channels | |
| self.kernel_size = _pair(kernel_size) | |
| self.stride = stride | |
| self.padding = padding | |
| self.dilation = dilation | |
| self.groups = groups | |
| self.deformable_groups = deformable_groups | |
| self.with_bias = bias | |
| self.norm = norm | |
| self.activation = activation | |
| self.weight = nn.Parameter( | |
| torch.Tensor(out_channels, in_channels // groups, *self.kernel_size) | |
| ) | |
| if bias: | |
| self.bias = nn.Parameter(torch.Tensor(out_channels)) | |
| else: | |
| self.bias = None | |
| nn.init.kaiming_uniform_(self.weight, nonlinearity="relu") | |
| if self.bias is not None: | |
| nn.init.constant_(self.bias, 0) | |
| def forward(self, x, offset, mask): | |
| if x.numel() == 0: | |
| output_shape = [ | |
| (i + 2 * p - (di * (k - 1) + 1)) // s + 1 | |
| for i, p, di, k, s in zip( | |
| x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride | |
| ) | |
| ] | |
| output_shape = [x.shape[0], self.weight.shape[0]] + output_shape | |
| return _NewEmptyTensorOp.apply(x, output_shape) | |
| x = modulated_deform_conv( | |
| x, | |
| offset, | |
| mask, | |
| self.weight, | |
| self.bias, | |
| self.stride, | |
| self.padding, | |
| self.dilation, | |
| self.groups, | |
| self.deformable_groups, | |
| ) | |
| if self.norm is not None: | |
| x = self.norm(x) | |
| if self.activation is not None: | |
| x = self.activation(x) | |
| return x | |
| def extra_repr(self): | |
| tmpstr = "in_channels=" + str(self.in_channels) | |
| tmpstr += ", out_channels=" + str(self.out_channels) | |
| tmpstr += ", kernel_size=" + str(self.kernel_size) | |
| tmpstr += ", stride=" + str(self.stride) | |
| tmpstr += ", padding=" + str(self.padding) | |
| tmpstr += ", dilation=" + str(self.dilation) | |
| tmpstr += ", groups=" + str(self.groups) | |
| tmpstr += ", deformable_groups=" + str(self.deformable_groups) | |
| tmpstr += ", bias=" + str(self.with_bias) | |
| return tmpstr | |
| try: | |
| from detectron2 import _C | |
| except ImportError: | |
| # TODO: register ops natively so there is no need to import _C. | |
| _msg = "detectron2 is not compiled successfully, please build following the instructions!" | |
| _args = ("detectron2._C", _msg) | |
| DeformConv = create_dummy_class("DeformConv", *_args) | |
| ModulatedDeformConv = create_dummy_class("ModulatedDeformConv", *_args) | |
| deform_conv = create_dummy_func("deform_conv", *_args) | |
| modulated_deform_conv = create_dummy_func("modulated_deform_conv", *_args) | |