Spaces:
Running
Running
import torch | |
import numbers | |
from torch.nn.parameter import Parameter | |
from .module import Module | |
from ._functions import CrossMapLRN2d as _cross_map_lrn2d | |
from .. import functional as F | |
from .. import init | |
from torch import Tensor, Size | |
from typing import Union, List, Tuple | |
__all__ = ['LocalResponseNorm', 'CrossMapLRN2d', 'LayerNorm', 'GroupNorm'] | |
class LocalResponseNorm(Module): | |
r"""Applies local response normalization over an input signal. | |
The input signal is composed of several input planes, where channels occupy the second dimension. | |
Applies normalization across channels. | |
.. math:: | |
b_{c} = a_{c}\left(k + \frac{\alpha}{n} | |
\sum_{c'=\max(0, c-n/2)}^{\min(N-1,c+n/2)}a_{c'}^2\right)^{-\beta} | |
Args: | |
size: amount of neighbouring channels used for normalization | |
alpha: multiplicative factor. Default: 0.0001 | |
beta: exponent. Default: 0.75 | |
k: additive factor. Default: 1 | |
Shape: | |
- Input: :math:`(N, C, *)` | |
- Output: :math:`(N, C, *)` (same shape as input) | |
Examples:: | |
>>> lrn = nn.LocalResponseNorm(2) | |
>>> signal_2d = torch.randn(32, 5, 24, 24) | |
>>> signal_4d = torch.randn(16, 5, 7, 7, 7, 7) | |
>>> output_2d = lrn(signal_2d) | |
>>> output_4d = lrn(signal_4d) | |
""" | |
__constants__ = ['size', 'alpha', 'beta', 'k'] | |
size: int | |
alpha: float | |
beta: float | |
k: float | |
def __init__(self, size: int, alpha: float = 1e-4, beta: float = 0.75, k: float = 1.) -> None: | |
super().__init__() | |
self.size = size | |
self.alpha = alpha | |
self.beta = beta | |
self.k = k | |
def forward(self, input: Tensor) -> Tensor: | |
return F.local_response_norm(input, self.size, self.alpha, self.beta, | |
self.k) | |
def extra_repr(self): | |
return '{size}, alpha={alpha}, beta={beta}, k={k}'.format(**self.__dict__) | |
class CrossMapLRN2d(Module): | |
size: int | |
alpha: float | |
beta: float | |
k: float | |
def __init__(self, size: int, alpha: float = 1e-4, beta: float = 0.75, k: float = 1) -> None: | |
super().__init__() | |
self.size = size | |
self.alpha = alpha | |
self.beta = beta | |
self.k = k | |
def forward(self, input: Tensor) -> Tensor: | |
return _cross_map_lrn2d.apply(input, self.size, self.alpha, self.beta, | |
self.k) | |
def extra_repr(self) -> str: | |
return '{size}, alpha={alpha}, beta={beta}, k={k}'.format(**self.__dict__) | |
_shape_t = Union[int, List[int], Size] | |
class LayerNorm(Module): | |
r"""Applies Layer Normalization over a mini-batch of inputs. | |
This layer implements the operation as described in | |
the paper `Layer Normalization <https://arxiv.org/abs/1607.06450>`__ | |
.. math:: | |
y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta | |
The mean and standard-deviation are calculated over the last `D` dimensions, where `D` | |
is the dimension of :attr:`normalized_shape`. For example, if :attr:`normalized_shape` | |
is ``(3, 5)`` (a 2-dimensional shape), the mean and standard-deviation are computed over | |
the last 2 dimensions of the input (i.e. ``input.mean((-2, -1))``). | |
:math:`\gamma` and :math:`\beta` are learnable affine transform parameters of | |
:attr:`normalized_shape` if :attr:`elementwise_affine` is ``True``. | |
The standard-deviation is calculated via the biased estimator, equivalent to | |
`torch.var(input, unbiased=False)`. | |
.. note:: | |
Unlike Batch Normalization and Instance Normalization, which applies | |
scalar scale and bias for each entire channel/plane with the | |
:attr:`affine` option, Layer Normalization applies per-element scale and | |
bias with :attr:`elementwise_affine`. | |
This layer uses statistics computed from input data in both training and | |
evaluation modes. | |
Args: | |
normalized_shape (int or list or torch.Size): input shape from an expected input | |
of size | |
.. math:: | |
[* \times \text{normalized\_shape}[0] \times \text{normalized\_shape}[1] | |
\times \ldots \times \text{normalized\_shape}[-1]] | |
If a single integer is used, it is treated as a singleton list, and this module will | |
normalize over the last dimension which is expected to be of that specific size. | |
eps: a value added to the denominator for numerical stability. Default: 1e-5 | |
elementwise_affine: a boolean value that when set to ``True``, this module | |
has learnable per-element affine parameters initialized to ones (for weights) | |
and zeros (for biases). Default: ``True``. | |
bias: If set to ``False``, the layer will not learn an additive bias (only relevant if | |
:attr:`elementwise_affine` is ``True``). Default: ``True``. | |
Attributes: | |
weight: the learnable weights of the module of shape | |
:math:`\text{normalized\_shape}` when :attr:`elementwise_affine` is set to ``True``. | |
The values are initialized to 1. | |
bias: the learnable bias of the module of shape | |
:math:`\text{normalized\_shape}` when :attr:`elementwise_affine` is set to ``True``. | |
The values are initialized to 0. | |
Shape: | |
- Input: :math:`(N, *)` | |
- Output: :math:`(N, *)` (same shape as input) | |
Examples:: | |
>>> # NLP Example | |
>>> batch, sentence_length, embedding_dim = 20, 5, 10 | |
>>> embedding = torch.randn(batch, sentence_length, embedding_dim) | |
>>> layer_norm = nn.LayerNorm(embedding_dim) | |
>>> # Activate module | |
>>> layer_norm(embedding) | |
>>> | |
>>> # Image Example | |
>>> N, C, H, W = 20, 5, 10, 10 | |
>>> input = torch.randn(N, C, H, W) | |
>>> # Normalize over the last three dimensions (i.e. the channel and spatial dimensions) | |
>>> # as shown in the image below | |
>>> layer_norm = nn.LayerNorm([C, H, W]) | |
>>> output = layer_norm(input) | |
.. image:: ../_static/img/nn/layer_norm.jpg | |
:scale: 50 % | |
""" | |
__constants__ = ['normalized_shape', 'eps', 'elementwise_affine'] | |
normalized_shape: Tuple[int, ...] | |
eps: float | |
elementwise_affine: bool | |
def __init__(self, normalized_shape: _shape_t, eps: float = 1e-5, elementwise_affine: bool = True, | |
bias: bool = True, device=None, dtype=None) -> None: | |
factory_kwargs = {'device': device, 'dtype': dtype} | |
super().__init__() | |
if isinstance(normalized_shape, numbers.Integral): | |
# mypy error: incompatible types in assignment | |
normalized_shape = (normalized_shape,) # type: ignore[assignment] | |
self.normalized_shape = tuple(normalized_shape) # type: ignore[arg-type] | |
self.eps = eps | |
self.elementwise_affine = elementwise_affine | |
if self.elementwise_affine: | |
self.weight = Parameter(torch.empty(self.normalized_shape, **factory_kwargs)) | |
if bias: | |
self.bias = Parameter(torch.empty(self.normalized_shape, **factory_kwargs)) | |
else: | |
self.register_parameter('bias', None) | |
else: | |
self.register_parameter('weight', None) | |
self.register_parameter('bias', None) | |
self.reset_parameters() | |
def reset_parameters(self) -> None: | |
if self.elementwise_affine: | |
init.ones_(self.weight) | |
if self.bias is not None: | |
init.zeros_(self.bias) | |
def forward(self, input: Tensor) -> Tensor: | |
return F.layer_norm( | |
input, self.normalized_shape, self.weight, self.bias, self.eps) | |
def extra_repr(self) -> str: | |
return '{normalized_shape}, eps={eps}, ' \ | |
'elementwise_affine={elementwise_affine}'.format(**self.__dict__) | |
class GroupNorm(Module): | |
r"""Applies Group Normalization over a mini-batch of inputs. | |
This layer implements the operation as described in | |
the paper `Group Normalization <https://arxiv.org/abs/1803.08494>`__ | |
.. math:: | |
y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta | |
The input channels are separated into :attr:`num_groups` groups, each containing | |
``num_channels / num_groups`` channels. :attr:`num_channels` must be divisible by | |
:attr:`num_groups`. The mean and standard-deviation are calculated | |
separately over the each group. :math:`\gamma` and :math:`\beta` are learnable | |
per-channel affine transform parameter vectors of size :attr:`num_channels` if | |
:attr:`affine` is ``True``. | |
The standard-deviation is calculated via the biased estimator, equivalent to | |
`torch.var(input, unbiased=False)`. | |
This layer uses statistics computed from input data in both training and | |
evaluation modes. | |
Args: | |
num_groups (int): number of groups to separate the channels into | |
num_channels (int): number of channels expected in input | |
eps: a value added to the denominator for numerical stability. Default: 1e-5 | |
affine: a boolean value that when set to ``True``, this module | |
has learnable per-channel affine parameters initialized to ones (for weights) | |
and zeros (for biases). Default: ``True``. | |
Shape: | |
- Input: :math:`(N, C, *)` where :math:`C=\text{num\_channels}` | |
- Output: :math:`(N, C, *)` (same shape as input) | |
Examples:: | |
>>> input = torch.randn(20, 6, 10, 10) | |
>>> # Separate 6 channels into 3 groups | |
>>> m = nn.GroupNorm(3, 6) | |
>>> # Separate 6 channels into 6 groups (equivalent with InstanceNorm) | |
>>> m = nn.GroupNorm(6, 6) | |
>>> # Put all 6 channels into a single group (equivalent with LayerNorm) | |
>>> m = nn.GroupNorm(1, 6) | |
>>> # Activating the module | |
>>> output = m(input) | |
""" | |
__constants__ = ['num_groups', 'num_channels', 'eps', 'affine'] | |
num_groups: int | |
num_channels: int | |
eps: float | |
affine: bool | |
def __init__(self, num_groups: int, num_channels: int, eps: float = 1e-5, affine: bool = True, | |
device=None, dtype=None) -> None: | |
factory_kwargs = {'device': device, 'dtype': dtype} | |
super().__init__() | |
if num_channels % num_groups != 0: | |
raise ValueError('num_channels must be divisible by num_groups') | |
self.num_groups = num_groups | |
self.num_channels = num_channels | |
self.eps = eps | |
self.affine = affine | |
if self.affine: | |
self.weight = Parameter(torch.empty(num_channels, **factory_kwargs)) | |
self.bias = Parameter(torch.empty(num_channels, **factory_kwargs)) | |
else: | |
self.register_parameter('weight', None) | |
self.register_parameter('bias', None) | |
self.reset_parameters() | |
def reset_parameters(self) -> None: | |
if self.affine: | |
init.ones_(self.weight) | |
init.zeros_(self.bias) | |
def forward(self, input: Tensor) -> Tensor: | |
return F.group_norm( | |
input, self.num_groups, self.weight, self.bias, self.eps) | |
def extra_repr(self) -> str: | |
return '{num_groups}, {num_channels}, eps={eps}, ' \ | |
'affine={affine}'.format(**self.__dict__) | |
# TODO: ContrastiveNorm2d | |
# TODO: DivisiveNorm2d | |
# TODO: SubtractiveNorm2d | |