Spaces:

KalbeDigitalLab
/

phbreast-bilateral-view-mammography-classifier

Build error

App Files Files Community

haritsahm commited on Apr 9, 2023

Commit

861e32a

1 Parent(s): 0fd3229

Add model files

Browse files

Files changed (7) hide show

models/__init__.py +0 -0
models/hypercomplex_layers.py +523 -0
models/hypercomplex_ops.py +905 -0
models/phc_models.py +365 -0
models/real_models.py +333 -0
utils/__init__.py +0 -0
utils/utils.py +17 -0

models/__init__.py ADDED Viewed

File without changes

models/hypercomplex_layers.py ADDED Viewed

	@@ -0,0 +1,523 @@

+# This layers are borrowed from: https://github.com/eleGAN23/HyperNets
+# by Eleonora Grassucci,
+# Please check the original reposiotry for further explanations.
+import math
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from numpy.random import RandomState
+from torch.nn import Module, init
+from torch.nn.parameter import Parameter
+from models import hypercomplex_ops as hp_ops
+########################
+## STANDARD PHM LAYER ##
+########################
+class PHMLinear(nn.Module):
+    def __init__(self, n, in_features, out_features, cuda=True):
+        super().__init__()
+        self.n = n
+        self.in_features = in_features
+        self.out_features = out_features
+        self.cuda = cuda
+        self.bias = nn.Parameter(torch.Tensor(out_features))
+        self.A = nn.Parameter(
+            torch.nn.init.xavier_uniform_(torch.zeros((n, n, n))))
+        self.S = nn.Parameter(torch.nn.init.xavier_uniform_(
+            torch.zeros((n, self.out_features//n, self.in_features//n))))
+        self.weight = torch.zeros((self.out_features, self.in_features))
+        fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
+        bound = 1 / math.sqrt(fan_in)
+        init.uniform_(self.bias, -bound, bound)
+    # adapted from Bayer Research's implementation
+    def kronecker_product1(self, a, b):
+        siz1 = torch.Size(torch.tensor(
+            a.shape[-2:]) * torch.tensor(b.shape[-2:]))
+        res = a.unsqueeze(-1).unsqueeze(-3) * b.unsqueeze(-2).unsqueeze(-4)
+        siz0 = res.shape[:-4]
+        out = res.reshape(siz0 + siz1)
+        return out
+    def kronecker_product2(self):
+        H = torch.zeros((self.out_features, self.in_features))
+        for i in range(self.n):
+            H = H + torch.kron(self.A[i], self.S[i])
+        return H
+    def forward(self, input):
+        self.weight = torch.sum(self.kronecker_product1(self.A, self.S), dim=0)
+#     self.weight = self.kronecker_product2() <- SLOWER
+        input = input.type(dtype=self.weight.type())
+        return F.linear(input, weight=self.weight, bias=self.bias)
+    def extra_repr(self) -> str:
+        return 'in_features={}, out_features={}, bias={}'.format(
+            self.in_features, self.out_features, self.bias is not None)
+    def reset_parameters(self) -> None:
+        init.kaiming_uniform_(self.A, a=math.sqrt(5))
+        init.kaiming_uniform_(self.S, a=math.sqrt(5))
+        fan_in, _ = init._calculate_fan_in_and_fan_out(self.placeholder)
+        bound = 1 / math.sqrt(fan_in)
+        init.uniform_(self.bias, -bound, bound)
+#############################
+## CONVOLUTIONAL PH LAYER ##
+#############################
+class PHConv(Module):
+    def __init__(self, n, in_features, out_features, kernel_size, padding=0, stride=1, cuda=True):
+        super().__init__()
+        self.n = n
+        self.in_features = in_features
+        self.out_features = out_features
+        self.padding = padding
+        self.stride = stride
+        self.cuda = cuda
+        self.bias = nn.Parameter(torch.Tensor(out_features))
+        self.A = nn.Parameter(
+            torch.nn.init.xavier_uniform_(torch.zeros((n, n, n))))
+        self.F = nn.Parameter(torch.nn.init.xavier_uniform_(
+            torch.zeros((n, self.out_features//n, self.in_features//n, kernel_size, kernel_size))))
+        self.weight = torch.zeros((self.out_features, self.in_features))
+        self.kernel_size = kernel_size
+        fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
+        bound = 1 / math.sqrt(fan_in)
+        init.uniform_(self.bias, -bound, bound)
+    def kronecker_product1(self, A, F):
+        siz1 = torch.Size(torch.tensor(
+            A.shape[-2:]) * torch.tensor(F.shape[-4:-2]))
+        siz2 = torch.Size(torch.tensor(F.shape[-2:]))
+        res = A.unsqueeze(-1).unsqueeze(-3).unsqueeze(-1).unsqueeze(-1) * \
+            F.unsqueeze(-4).unsqueeze(-6)
+        siz0 = res.shape[:1]
+        out = res.reshape(siz0 + siz1 + siz2)
+        return out
+    def kronecker_product2(self):
+        H = torch.zeros((self.out_features, self.in_features,
+                        self.kernel_size, self.kernel_size))
+        if self.cuda:
+            H = H.cuda()
+        for i in range(self.n):
+            kron_prod = torch.kron(self.A[i], self.F[i]).view(
+                self.out_features, self.in_features, self.kernel_size, self.kernel_size)
+            H = H + kron_prod
+        return H
+    def forward(self, input):
+        self.weight = torch.sum(self.kronecker_product1(self.A, self.F), dim=0)
+        # self.weight = self.kronecker_product2()
+        # if self.cuda:
+        #    self.weight = self.weight.cuda()
+        input = input.type(dtype=self.weight.type())
+        return F.conv2d(input, weight=self.weight, stride=self.stride, padding=self.padding)
+    def extra_repr(self) -> str:
+        return 'in_features={}, out_features={}, bias={}'.format(
+            self.in_features, self.out_features, self.bias is not None)
+    def reset_parameters(self) -> None:
+        init.kaiming_uniform_(self.A, a=math.sqrt(5))
+        init.kaiming_uniform_(self.F, a=math.sqrt(5))
+        fan_in, _ = init._calculate_fan_in_and_fan_out(self.placeholder)
+        bound = 1 / math.sqrt(fan_in)
+        init.uniform_(self.bias, -bound, bound)
+class KroneckerConv(Module):
+    r"""Applies a Quaternion Convolution to the incoming data."""
+    def __init__(self, in_channels, out_channels, kernel_size, stride,
+                 dilatation=1, padding=0, groups=1, bias=True, init_criterion='glorot',
+                 weight_init='quaternion', seed=None, operation='convolution2d', rotation=False,
+                 quaternion_format=True, scale=False, learn_A=False, cuda=True, first_layer=False):
+        super().__init__()
+        self.in_channels = in_channels // 4
+        self.out_channels = out_channels // 4
+        self.stride = stride
+        self.padding = padding
+        self.groups = groups
+        self.dilatation = dilatation
+        self.init_criterion = init_criterion
+        self.weight_init = weight_init
+        self.seed = seed if seed is not None else np.random.randint(0, 1234)
+        self.rng = RandomState(self.seed)
+        self.operation = operation
+        self.rotation = rotation
+        self.quaternion_format = quaternion_format
+        self.winit = {'quaternion': hp_ops.quaternion_init,
+                      'unitary': hp_ops.unitary_init,
+                      'random': hp_ops.random_init}[self.weight_init]
+        self.scale = scale
+        self.learn_A = learn_A
+        self.cuda = cuda
+        self.first_layer = first_layer
+        (self.kernel_size, self.w_shape) = hp_ops.get_kernel_and_weight_shape(self.operation,
+                                                                              self.in_channels, self.out_channels, kernel_size)
+        self.r_weight = Parameter(torch.Tensor(*self.w_shape))
+        self.i_weight = Parameter(torch.Tensor(*self.w_shape))
+        self.j_weight = Parameter(torch.Tensor(*self.w_shape))
+        self.k_weight = Parameter(torch.Tensor(*self.w_shape))
+        if self.scale:
+            self.scale_param = Parameter(torch.Tensor(self.r_weight.shape))
+        else:
+            self.scale_param = None
+        if self.rotation:
+            self.zero_kernel = Parameter(torch.zeros(
+                self.r_weight.shape), requires_grad=False)
+        if bias:
+            self.bias = Parameter(torch.Tensor(out_channels))
+        else:
+            self.register_parameter('bias', None)
+        self.reset_parameters()
+    def reset_parameters(self):
+        hp_ops.affect_init_conv(self.r_weight, self.i_weight, self.j_weight, self.k_weight,
+                                self.kernel_size, self.winit, self.rng, self.init_criterion)
+        if self.scale_param is not None:
+            torch.nn.init.xavier_uniform_(self.scale_param.data)
+        if self.bias is not None:
+            self.bias.data.zero_()
+    def forward(self, input):
+        if self.rotation:
+            # return quaternion_conv_rotation(input, self.zero_kernel, self.r_weight, self.i_weight, self.j_weight,
+            #     self.k_weight, self.bias, self.stride, self.padding, self.groups, self.dilatation,
+            #     self.quaternion_format, self.scale_param)
+            pass
+        else:
+            return hp_ops.kronecker_conv(input, self.r_weight, self.i_weight, self.j_weight,
+                                         self.k_weight, self.bias, self.stride, self.padding, self.groups, self.dilatation, self.learn_A, self.cuda, self.first_layer)
+    def __repr__(self):
+        return self.__class__.__name__ + '(' \
+            + 'in_channels=' + str(self.in_channels) \
+            + ', out_channels=' + str(self.out_channels) \
+            + ', bias=' + str(self.bias is not None) \
+            + ', kernel_size=' + str(self.kernel_size) \
+            + ', stride=' + str(self.stride) \
+            + ', padding=' + str(self.padding) \
+            + ', init_criterion=' + str(self.init_criterion) \
+            + ', weight_init=' + str(self.weight_init) \
+            + ', seed=' + str(self.seed) \
+            + ', rotation=' + str(self.rotation) \
+            + ', q_format=' + str(self.quaternion_format) \
+            + ', operation=' + str(self.operation) + ')'
+class QuaternionTransposeConv(Module):
+    r"""Applies a Quaternion Transposed Convolution (or Deconvolution) to the incoming data."""
+    def __init__(self, in_channels, out_channels, kernel_size, stride,
+                 dilatation=1, padding=0, output_padding=0, groups=1, bias=True, init_criterion='he',
+                 weight_init='quaternion', seed=None, operation='convolution2d', rotation=False,
+                 quaternion_format=False):
+        super().__init__()
+        self.in_channels = in_channels // 4
+        self.out_channels = out_channels // 4
+        self.stride = stride
+        self.padding = padding
+        self.output_padding = output_padding
+        self.groups = groups
+        self.dilatation = dilatation
+        self.init_criterion = init_criterion
+        self.weight_init = weight_init
+        self.seed = seed if seed is not None else np.random.randint(0, 1234)
+        self.rng = RandomState(self.seed)
+        self.operation = operation
+        self.rotation = rotation
+        self.quaternion_format = quaternion_format
+        self.winit = {'quaternion': hp_ops.quaternion_init,
+                      'unitary': hp_ops.unitary_init,
+                      'random': hp_ops.random_init}[self.weight_init]
+        (self.kernel_size, self.w_shape) = hp_ops.get_kernel_and_weight_shape(self.operation,
+                                                                              self.out_channels, self.in_channels, kernel_size)
+        self.r_weight = Parameter(torch.Tensor(*self.w_shape))
+        self.i_weight = Parameter(torch.Tensor(*self.w_shape))
+        self.j_weight = Parameter(torch.Tensor(*self.w_shape))
+        self.k_weight = Parameter(torch.Tensor(*self.w_shape))
+        if bias:
+            self.bias = Parameter(torch.Tensor(out_channels))
+        else:
+            self.register_parameter('bias', None)
+        self.reset_parameters()
+    def reset_parameters(self):
+        hp_ops.affect_init_conv(self.r_weight, self.i_weight, self.j_weight, self.k_weight,
+                                self.kernel_size, self.winit, self.rng, self.init_criterion)
+        if self.bias is not None:
+            self.bias.data.zero_()
+    def forward(self, input):
+        if self.rotation:
+            return hp_ops.quaternion_tranpose_conv_rotation(input, self.r_weight, self.i_weight,
+                                                            self.j_weight, self.k_weight, self.bias, self.stride, self.padding,
+                                                            self.output_padding, self.groups, self.dilatation, self.quaternion_format)
+        else:
+            return hp_ops.quaternion_transpose_conv(input, self.r_weight, self.i_weight, self.j_weight,
+                                                    self.k_weight, self.bias, self.stride, self.padding, self.output_padding,
+                                                    self.groups, self.dilatation)
+    def __repr__(self):
+        return self.__class__.__name__ + '(' \
+            + 'in_channels=' + str(self.in_channels) \
+            + ', out_channels=' + str(self.out_channels) \
+            + ', bias=' + str(self.bias is not None) \
+            + ', kernel_size=' + str(self.kernel_size) \
+            + ', stride=' + str(self.stride) \
+            + ', padding=' + str(self.padding) \
+            + ', dilation=' + str(self.dilation) \
+            + ', init_criterion=' + str(self.init_criterion) \
+            + ', weight_init=' + str(self.weight_init) \
+            + ', seed=' + str(self.seed) \
+            + ', operation=' + str(self.operation) + ')'
+class QuaternionConv(Module):
+    r"""Applies a Quaternion Convolution to the incoming data."""
+    def __init__(self, in_channels, out_channels, kernel_size, stride,
+                 dilatation=1, padding=0, groups=1, bias=True, init_criterion='glorot',
+                 weight_init='quaternion', seed=None, operation='convolution2d', rotation=False, quaternion_format=True, scale=False):
+        super().__init__()
+        self.in_channels = in_channels // 4
+        self.out_channels = out_channels // 4
+        self.stride = stride
+        self.padding = padding
+        self.groups = groups
+        self.dilatation = dilatation
+        self.init_criterion = init_criterion
+        self.weight_init = weight_init
+        self.seed = seed if seed is not None else np.random.randint(0, 1234)
+        self.rng = RandomState(self.seed)
+        self.operation = operation
+        self.rotation = rotation
+        self.quaternion_format = quaternion_format
+        self.winit = {'quaternion': hp_ops.quaternion_init,
+                      'unitary': hp_ops.unitary_init,
+                      'random': hp_ops.random_init}[self.weight_init]
+        self.scale = scale
+        (self.kernel_size, self.w_shape) = hp_ops.get_kernel_and_weight_shape(self.operation,
+                                                                              self.in_channels, self.out_channels, kernel_size)
+        self.r_weight = Parameter(torch.Tensor(*self.w_shape))
+        self.i_weight = Parameter(torch.Tensor(*self.w_shape))
+        self.j_weight = Parameter(torch.Tensor(*self.w_shape))
+        self.k_weight = Parameter(torch.Tensor(*self.w_shape))
+        if self.scale:
+            self.scale_param = Parameter(torch.Tensor(self.r_weight.shape))
+        else:
+            self.scale_param = None
+        if self.rotation:
+            self.zero_kernel = Parameter(torch.zeros(
+                self.r_weight.shape), requires_grad=False)
+        if bias:
+            self.bias = Parameter(torch.Tensor(out_channels))
+        else:
+            self.register_parameter('bias', None)
+        self.reset_parameters()
+    def reset_parameters(self):
+        hp_ops.affect_init_conv(self.r_weight, self.i_weight, self.j_weight, self.k_weight,
+                                self.kernel_size, self.winit, self.rng, self.init_criterion)
+        if self.scale_param is not None:
+            torch.nn.init.xavier_uniform_(self.scale_param.data)
+        if self.bias is not None:
+            self.bias.data.zero_()
+    def forward(self, input):
+        if self.rotation:
+            return hp_ops.quaternion_conv_rotation(input, self.zero_kernel, self.r_weight, self.i_weight, self.j_weight,
+                                                   self.k_weight, self.bias, self.stride, self.padding, self.groups, self.dilatation,
+                                                   self.quaternion_format, self.scale_param)
+        else:
+            return hp_ops.quaternion_conv(input, self.r_weight, self.i_weight, self.j_weight,
+                                          self.k_weight, self.bias, self.stride, self.padding, self.groups, self.dilatation)
+    def __repr__(self):
+        return self.__class__.__name__ + '(' \
+            + 'in_channels=' + str(self.in_channels) \
+            + ', out_channels=' + str(self.out_channels) \
+            + ', bias=' + str(self.bias is not None) \
+            + ', kernel_size=' + str(self.kernel_size) \
+            + ', stride=' + str(self.stride) \
+            + ', padding=' + str(self.padding) \
+            + ', init_criterion=' + str(self.init_criterion) \
+            + ', weight_init=' + str(self.weight_init) \
+            + ', seed=' + str(self.seed) \
+            + ', rotation=' + str(self.rotation) \
+            + ', q_format=' + str(self.quaternion_format) \
+            + ', operation=' + str(self.operation) + ')'
+class QuaternionLinearAutograd(Module):
+    r"""Applies a quaternion linear transformation to the incoming data.
+    A custom Autograd function is call to drastically reduce the VRAM consumption. Nonetheless, computing time
+    is also slower compared to QuaternionLinear().
+    """
+    def __init__(self, in_features, out_features, bias=True,
+                 init_criterion='glorot', weight_init='quaternion',
+                 seed=None, rotation=False, quaternion_format=True, scale=False):
+        super().__init__()
+        self.in_features = in_features//4
+        self.out_features = out_features//4
+        self.rotation = rotation
+        self.quaternion_format = quaternion_format
+        self.r_weight = Parameter(torch.Tensor(
+            self.in_features, self.out_features))
+        self.i_weight = Parameter(torch.Tensor(
+            self.in_features, self.out_features))
+        self.j_weight = Parameter(torch.Tensor(
+            self.in_features, self.out_features))
+        self.k_weight = Parameter(torch.Tensor(
+            self.in_features, self.out_features))
+        self.scale = scale
+        if self.scale:
+            self.scale_param = Parameter(torch.Tensor(
+                self.in_features, self.out_features))
+        else:
+            self.scale_param = None
+        if self.rotation:
+            self.zero_kernel = Parameter(torch.zeros(
+                self.r_weight.shape), requires_grad=False)
+        if bias:
+            self.bias = Parameter(torch.Tensor(self.out_features*4))
+        else:
+            self.register_parameter('bias', None)
+        self.init_criterion = init_criterion
+        self.weight_init = weight_init
+        self.seed = seed if seed is not None else np.random.randint(0, 1234)
+        self.rng = RandomState(self.seed)
+        self.reset_parameters()
+    def reset_parameters(self):
+        winit = {'quaternion': hp_ops.quaternion_init, 'unitary': hp_ops.unitary_init,
+                 'random': hp_ops.random_init}[self.weight_init]
+        if self.scale_param is not None:
+            torch.nn.init.xavier_uniform_(self.scale_param.data)
+        if self.bias is not None:
+            self.bias.data.fill_(0)
+        hp_ops.affect_init(self.r_weight, self.i_weight, self.j_weight, self.k_weight, winit,
+                           self.rng, self.init_criterion)
+    def forward(self, input):
+        # See the autograd section for explanation of what happens here.
+        if self.rotation:
+            return hp_ops.quaternion_linear_rotation(input, self.zero_kernel, self.r_weight, self.i_weight, self.j_weight, self.k_weight, self.bias, self.quaternion_format, self.scale_param)
+        else:
+            return hp_ops.quaternion_linear(input, self.r_weight, self.i_weight, self.j_weight, self.k_weight, self.bias)
+    def __repr__(self):
+        return self.__class__.__name__ + '(' \
+            + 'in_features=' + str(self.in_features) \
+            + ', out_features=' + str(self.out_features) \
+            + ', bias=' + str(self.bias is not None) \
+            + ', init_criterion=' + str(self.init_criterion) \
+            + ', weight_init=' + str(self.weight_init) \
+            + ', rotation=' + str(self.rotation) \
+            + ', seed=' + str(self.seed) + ')'
+class QuaternionLinear(Module):
+    r"""Applies a quaternion linear transformation to the incoming data."""
+    def __init__(self, in_features, out_features, bias=True,
+                 init_criterion='he', weight_init='quaternion',
+                 seed=None):
+        super().__init__()
+        self.in_features = in_features//4
+        self.out_features = out_features//4
+        self.r_weight = Parameter(torch.Tensor(
+            self.in_features, self.out_features))
+        self.i_weight = Parameter(torch.Tensor(
+            self.in_features, self.out_features))
+        self.j_weight = Parameter(torch.Tensor(
+            self.in_features, self.out_features))
+        self.k_weight = Parameter(torch.Tensor(
+            self.in_features, self.out_features))
+        if bias:
+            self.bias = Parameter(torch.Tensor(self.out_features*4))
+        else:
+            self.register_parameter('bias', None)
+        self.init_criterion = init_criterion
+        self.weight_init = weight_init
+        self.seed = seed if seed is not None else np.random.randint(0, 1234)
+        self.rng = RandomState(self.seed)
+        self.reset_parameters()
+    def reset_parameters(self):
+        winit = {'quaternion': hp_ops.quaternion_init,
+                 'unitary': hp_ops.unitary_init}[self.weight_init]
+        if self.bias is not None:
+            self.bias.data.fill_(0)
+        affect_init(self.r_weight, self.i_weight, self.j_weight, self.k_weight, winit,
+                    self.rng, self.init_criterion)
+    def forward(self, input):
+        # See the autograd section for explanation of what happens here.
+        if input.dim() == 3:
+            T, N, C = input.size()
+            input = input.view(T * N, C)
+            output = hp_ops.QuaternionLinearFunction.apply(
+                input, self.r_weight, self.i_weight, self.j_weight, self.k_weight, self.bias)
+            output = output.view(T, N, output.size(1))
+        elif input.dim() == 2:
+            output = hp_ops.QuaternionLinearFunction.apply(
+                input, self.r_weight, self.i_weight, self.j_weight, self.k_weight, self.bias)
+        else:
+            raise NotImplementedError
+        return output
+    def __repr__(self):
+        return self.__class__.__name__ + '(' \
+            + 'in_features=' + str(self.in_features) \
+            + ', out_features=' + str(self.out_features) \
+            + ', bias=' + str(self.bias is not None) \
+            + ', init_criterion=' + str(self.init_criterion) \
+            + ', weight_init=' + str(self.weight_init) \
+            + ', seed=' + str(self.seed) + ')'

models/hypercomplex_ops.py ADDED Viewed

	@@ -0,0 +1,905 @@

+##########################################################
+# pytorch-qnn v1.0
+# Titouan Parcollet
+# LIA, Université d'Avignon et des Pays du Vaucluse
+# ORKIS, Aix-en-provence
+# October 2018
+##########################################################
+import numpy as np
+import torch
+import torch.nn.functional as F
+from numpy.random import RandomState
+from scipy.stats import chi
+from torch.autograd import Variable
+def q_normalize(input, channel=1):
+    r = get_r(input)
+    i = get_i(input)
+    j = get_j(input)
+    k = get_k(input)
+    norm = torch.sqrt(r*r + i*i + j*j + k*k + 0.0001)
+    r = r / norm
+    i = i / norm
+    j = j / norm
+    k = k / norm
+    return torch.cat([r, i, j, k], dim=channel)
+def check_input(input):
+    if input.dim() not in {2, 3, 4, 5}:
+        raise RuntimeError(
+            'Quaternion linear accepts only input of dimension 2 or 3. Quaternion conv accepts up to 5 dim '
+            ' input.dim = ' + str(input.dim())
+        )
+    if input.dim() < 4:
+        nb_hidden = input.size()[-1]
+    else:
+        nb_hidden = input.size()[1]
+    if nb_hidden % 4 != 0:
+        raise RuntimeError(
+            'Quaternion Tensors must be divisible by 4.'
+            ' input.size()[1] = ' + str(nb_hidden)
+        )
+#
+# Getters
+#
+def get_r(input):
+    check_input(input)
+    if input.dim() < 4:
+        nb_hidden = input.size()[-1]
+    else:
+        nb_hidden = input.size()[1]
+    if input.dim() == 2:
+        return input.narrow(1, 0, nb_hidden // 4)
+    if input.dim() == 3:
+        return input.narrow(2, 0, nb_hidden // 4)
+    if input.dim() >= 4:
+        return input.narrow(1, 0, nb_hidden // 4)
+def get_i(input):
+    if input.dim() < 4:
+        nb_hidden = input.size()[-1]
+    else:
+        nb_hidden = input.size()[1]
+    if input.dim() == 2:
+        return input.narrow(1, nb_hidden // 4, nb_hidden // 4)
+    if input.dim() == 3:
+        return input.narrow(2, nb_hidden // 4, nb_hidden // 4)
+    if input.dim() >= 4:
+        return input.narrow(1, nb_hidden // 4, nb_hidden // 4)
+def get_j(input):
+    check_input(input)
+    if input.dim() < 4:
+        nb_hidden = input.size()[-1]
+    else:
+        nb_hidden = input.size()[1]
+    if input.dim() == 2:
+        return input.narrow(1, nb_hidden // 2, nb_hidden // 4)
+    if input.dim() == 3:
+        return input.narrow(2, nb_hidden // 2, nb_hidden // 4)
+    if input.dim() >= 4:
+        return input.narrow(1, nb_hidden // 2, nb_hidden // 4)
+def get_k(input):
+    check_input(input)
+    if input.dim() < 4:
+        nb_hidden = input.size()[-1]
+    else:
+        nb_hidden = input.size()[1]
+    if input.dim() == 2:
+        return input.narrow(1, nb_hidden - nb_hidden // 4, nb_hidden // 4)
+    if input.dim() == 3:
+        return input.narrow(2, nb_hidden - nb_hidden // 4, nb_hidden // 4)
+    if input.dim() >= 4:
+        return input.narrow(1, nb_hidden - nb_hidden // 4, nb_hidden // 4)
+def get_modulus(input, vector_form=False):
+    check_input(input)
+    r = get_r(input)
+    i = get_i(input)
+    j = get_j(input)
+    k = get_k(input)
+    if vector_form:
+        return torch.sqrt(r * r + i * i + j * j + k * k)
+    else:
+        return torch.sqrt((r * r + i * i + j * j + k * k).sum(dim=0))
+def get_normalized(input, eps=0.0001):
+    check_input(input)
+    data_modulus = get_modulus(input)
+    if input.dim() == 2:
+        data_modulus_repeated = data_modulus.repeat(1, 4)
+    elif input.dim() == 3:
+        data_modulus_repeated = data_modulus.repeat(1, 1, 4)
+    return input / (data_modulus_repeated.expand_as(input) + eps)
+def quaternion_exp(input):
+    r = get_r(input)
+    i = get_i(input)
+    j = get_j(input)
+    k = get_k(input)
+    norm_v = torch.sqrt(i*i+j*j+k*k) + 0.0001
+    exp = torch.exp(r)
+    r = torch.cos(norm_v)
+    i = (i / norm_v) * torch.sin(norm_v)
+    j = (j / norm_v) * torch.sin(norm_v)
+    k = (k / norm_v) * torch.sin(norm_v)
+    return torch.cat([exp*r, exp*i, exp*j, exp*k], dim=1)
+def kronecker_conv(input, r_weight, i_weight, j_weight, k_weight, bias, stride,
+                   padding, groups, dilatation, learn_A, cuda, first_layer=False):  # ,
+    # mat1_learn, mat2_learn, mat3_learn, mat4_learn):
+    """Applies a quaternion convolution to the incoming data:"""
+    # Define the initial matrices to build the Hamilton product
+    if first_layer:
+        mat1 = torch.zeros((4, 4), requires_grad=False).view(4, 4, 1, 1)
+    else:
+        mat1 = torch.eye(4, requires_grad=False).view(4, 4, 1, 1)
+    # Define the four matrices that summed up build the Hamilton product rule.
+    mat2 = torch.tensor([[0, -1, 0, 0],
+                        [1, 0, 0, 0],
+                        [0, 0, 0, -1],
+                        [0, 0, 1, 0]], requires_grad=False).view(4, 4, 1, 1)
+    mat3 = torch.tensor([[0, 0, -1, 0],
+                        [0, 0, 0, 1],
+                        [1, 0, 0, 0],
+                        [0, -1, 0, 0]], requires_grad=False).view(4, 4, 1, 1)
+    mat4 = torch.tensor([[0, 0, 0, -1],
+                        [0, 0, -1, 0],
+                        [0, 1, 0, 0],
+                        [1, 0, 0, 0]], requires_grad=False).view(4, 4, 1, 1)
+    if cuda:
+        mat1, mat2, mat3, mat4 = mat1.cuda(), mat2.cuda(), mat3.cuda(), mat4.cuda()
+    # Sum of kronecker product between the four matrices and the learnable weights.
+    cat_kernels_4_quaternion = torch.kron(mat1, r_weight) + \
+        torch.kron(mat2, i_weight) + \
+        torch.kron(mat3, j_weight) + \
+        torch.kron(mat4, k_weight)
+    if input.dim() == 3:
+        convfunc = F.conv1d
+    elif input.dim() == 4:
+        convfunc = F.conv2d
+    elif input.dim() == 5:
+        convfunc = F.conv3d
+    else:
+        raise Exception('The convolutional input is either 3, 4 or 5 dimensions.'
+                        ' input.dim = ' + str(input.dim()))
+    return convfunc(input, cat_kernels_4_quaternion, bias, stride, padding, dilatation, groups)
+def quaternion_conv(input, r_weight, i_weight, j_weight, k_weight, bias, stride,
+                    padding, groups, dilatation):
+    """Applies a quaternion convolution to the incoming data:"""
+    cat_kernels_4_r = torch.cat(
+        [r_weight, -i_weight, -j_weight, -k_weight], dim=1)
+    cat_kernels_4_i = torch.cat(
+        [i_weight,  r_weight, -k_weight, j_weight], dim=1)
+    cat_kernels_4_j = torch.cat(
+        [j_weight,  k_weight, r_weight, -i_weight], dim=1)
+    cat_kernels_4_k = torch.cat(
+        [k_weight,  -j_weight, i_weight, r_weight], dim=1)
+    cat_kernels_4_quaternion = torch.cat(
+        [cat_kernels_4_r, cat_kernels_4_i, cat_kernels_4_j, cat_kernels_4_k], dim=0)
+    if input.dim() == 3:
+        convfunc = F.conv1d
+    elif input.dim() == 4:
+        convfunc = F.conv2d
+    elif input.dim() == 5:
+        convfunc = F.conv3d
+    else:
+        raise Exception('The convolutional input is either 3, 4 or 5 dimensions.'
+                        ' input.dim = ' + str(input.dim()))
+    return convfunc(input, cat_kernels_4_quaternion, bias, stride, padding, dilatation, groups)
+def quaternion_transpose_conv(input, r_weight, i_weight, j_weight, k_weight, bias, stride,
+                              padding, output_padding, groups, dilatation):
+    """Applies a quaternion transposed convolution to the incoming data:"""
+    cat_kernels_4_r = torch.cat(
+        [r_weight, -i_weight, -j_weight, -k_weight], dim=1)
+    cat_kernels_4_i = torch.cat(
+        [i_weight,  r_weight, -k_weight, j_weight], dim=1)
+    cat_kernels_4_j = torch.cat(
+        [j_weight,  k_weight, r_weight, -i_weight], dim=1)
+    cat_kernels_4_k = torch.cat(
+        [k_weight,  -j_weight, i_weight, r_weight], dim=1)
+    cat_kernels_4_quaternion = torch.cat(
+        [cat_kernels_4_r, cat_kernels_4_i, cat_kernels_4_j, cat_kernels_4_k], dim=0)
+    if input.dim() == 3:
+        convfunc = F.conv_transpose1d
+    elif input.dim() == 4:
+        convfunc = F.conv_transpose2d
+    elif input.dim() == 5:
+        convfunc = F.conv_transpose3d
+    else:
+        raise Exception('The convolutional input is either 3, 4 or 5 dimensions.'
+                        ' input.dim = ' + str(input.dim()))
+    return convfunc(input, cat_kernels_4_quaternion,
+                    bias, stride, padding, output_padding, groups, dilatation)
+def quaternion_conv_rotation(input, zero_kernel, r_weight, i_weight, j_weight, k_weight, bias, stride,
+                             padding, groups, dilatation, quaternion_format, scale=None):
+    """Applies a quaternion rotation and convolution transformation to the incoming data:
+    The rotation W*x*W^t can be replaced by R*x following:
+    https://en.wikipedia.org/wiki/Quaternions_and_spatial_rotation
+    Works for unitary and non unitary weights.
+    The initial size of the input must be a multiple of 3 if quaternion_format = False and
+    4 if quaternion_format = True.
+    """
+    square_r = (r_weight*r_weight)
+    square_i = (i_weight*i_weight)
+    square_j = (j_weight*j_weight)
+    square_k = (k_weight*k_weight)
+    norm = torch.sqrt(square_r+square_i+square_j+square_k + 0.0001)
+    # print(norm)
+    r_n_weight = (r_weight / norm)
+    i_n_weight = (i_weight / norm)
+    j_n_weight = (j_weight / norm)
+    k_n_weight = (k_weight / norm)
+    norm_factor = 2.0
+    square_i = norm_factor*(i_n_weight*i_n_weight)
+    square_j = norm_factor*(j_n_weight*j_n_weight)
+    square_k = norm_factor*(k_n_weight*k_n_weight)
+    ri = (norm_factor*r_n_weight*i_n_weight)
+    rj = (norm_factor*r_n_weight*j_n_weight)
+    rk = (norm_factor*r_n_weight*k_n_weight)
+    ij = (norm_factor*i_n_weight*j_n_weight)
+    ik = (norm_factor*i_n_weight*k_n_weight)
+    jk = (norm_factor*j_n_weight*k_n_weight)
+    if quaternion_format:
+        if scale is not None:
+            rot_kernel_1 = torch.cat([zero_kernel, scale * (1.0 - (square_j + square_k)),
+                                     scale * (ij-rk), scale * (ik+rj)], dim=1)
+            rot_kernel_2 = torch.cat([zero_kernel, scale * (ij+rk), scale *
+                                     (1.0 - (square_i + square_k)), scale * (jk-ri)], dim=1)
+            rot_kernel_3 = torch.cat([zero_kernel, scale * (ik-rj), scale * (jk+ri),
+                                     scale * (1.0 - (square_i + square_j))], dim=1)
+        else:
+            rot_kernel_1 = torch.cat(
+                [zero_kernel, (1.0 - (square_j + square_k)), (ij-rk), (ik+rj)], dim=1)
+            rot_kernel_2 = torch.cat(
+                [zero_kernel, (ij+rk), (1.0 - (square_i + square_k)), (jk-ri)], dim=1)
+            rot_kernel_3 = torch.cat(
+                [zero_kernel, (ik-rj), (jk+ri), (1.0 - (square_i + square_j))], dim=1)
+        zero_kernel2 = torch.cat(
+            [zero_kernel, zero_kernel, zero_kernel, zero_kernel], dim=1)
+        global_rot_kernel = torch.cat(
+            [zero_kernel2, rot_kernel_1, rot_kernel_2, rot_kernel_3], dim=0)
+    else:
+        if scale is not None:
+            rot_kernel_1 = torch.cat([scale * (1.0 - (square_j + square_k)),
+                                     scale * (ij-rk), scale * (ik+rj)], dim=0)
+            rot_kernel_2 = torch.cat(
+                [scale * (ij+rk), scale * (1.0 - (square_i + square_k)), scale * (jk-ri)], dim=0)
+            rot_kernel_3 = torch.cat([scale * (ik-rj), scale * (jk+ri), scale *
+                                     (1.0 - (square_i + square_j))], dim=0)
+        else:
+            rot_kernel_1 = torch.cat(
+                [1.0 - (square_j + square_k), (ij-rk), (ik+rj)], dim=0)
+            rot_kernel_2 = torch.cat(
+                [(ij+rk), 1.0 - (square_i + square_k), (jk-ri)], dim=0)
+            rot_kernel_3 = torch.cat(
+                [(ik-rj), (jk+ri), (1.0 - (square_i + square_j))], dim=0)
+        global_rot_kernel = torch.cat(
+            [rot_kernel_1, rot_kernel_2, rot_kernel_3], dim=0)
+    # print(input.shape)
+    # print(square_r.shape)
+    # print(global_rot_kernel.shape)
+    if input.dim() == 3:
+        convfunc = F.conv1d
+    elif input.dim() == 4:
+        convfunc = F.conv2d
+    elif input.dim() == 5:
+        convfunc = F.conv3d
+    else:
+        raise Exception('The convolutional input is either 3, 4 or 5 dimensions.'
+                        ' input.dim = ' + str(input.dim()))
+    return convfunc(input, global_rot_kernel, bias, stride, padding, dilatation, groups)
+def quaternion_transpose_conv_rotation(
+        input, zero_kernel, r_weight, i_weight, j_weight, k_weight, bias, stride,
+        padding, output_padding, groups, dilatation, quaternion_format):
+    """Applies a quaternion rotation and transposed convolution transformation to the incoming data:
+    The rotation W*x*W^t can be replaced by R*x following:
+    https://en.wikipedia.org/wiki/Quaternions_and_spatial_rotation
+    Works for unitary and non unitary weights.
+    The initial size of the input must be a multiple of 3 if quaternion_format = False and
+    4 if quaternion_format = True.
+    """
+    square_r = (r_weight*r_weight)
+    square_i = (i_weight*i_weight)
+    square_j = (j_weight*j_weight)
+    square_k = (k_weight*k_weight)
+    norm = torch.sqrt(square_r+square_i+square_j+square_k + 0.0001)
+    r_weight = (r_weight / norm)
+    i_weight = (i_weight / norm)
+    j_weight = (j_weight / norm)
+    k_weight = (k_weight / norm)
+    norm_factor = 2.0
+    square_i = norm_factor*(i_weight*i_weight)
+    square_j = norm_factor*(j_weight*j_weight)
+    square_k = norm_factor*(k_weight*k_weight)
+    ri = (norm_factor*r_weight*i_weight)
+    rj = (norm_factor*r_weight*j_weight)
+    rk = (norm_factor*r_weight*k_weight)
+    ij = (norm_factor*i_weight*j_weight)
+    ik = (norm_factor*i_weight*k_weight)
+    jk = (norm_factor*j_weight*k_weight)
+    if quaternion_format:
+        rot_kernel_1 = torch.cat(
+            [zero_kernel, 1.0 - (square_j + square_k), ij-rk, ik+rj], dim=1)
+        rot_kernel_2 = torch.cat(
+            [zero_kernel, ij+rk, 1.0 - (square_i + square_k), jk-ri], dim=1)
+        rot_kernel_3 = torch.cat(
+            [zero_kernel, ik-rj, jk+ri, 1.0 - (square_i + square_j)], dim=1)
+        zero_kernel2 = torch.zeros(rot_kernel_1.shape).cuda()
+        global_rot_kernel = torch.cat(
+            [zero_kernel2, rot_kernel_1, rot_kernel_2, rot_kernel_3], dim=0)
+    else:
+        rot_kernel_1 = torch.cat(
+            [1.0 - (square_j + square_k), ij-rk, ik+rj], dim=1)
+        rot_kernel_2 = torch.cat(
+            [ij+rk, 1.0 - (square_i + square_k), jk-ri], dim=1)
+        rot_kernel_3 = torch.cat(
+            [ik-rj, jk+ri, 1.0 - (square_i + square_j)], dim=1)
+        global_rot_kernel = torch.cat(
+            [rot_kernel_1, rot_kernel_2, rot_kernel_3], dim=0)
+    if input.dim() == 3:
+        convfunc = F.conv_transpose1d
+    elif input.dim() == 4:
+        convfunc = F.conv_transpose2d
+    elif input.dim() == 5:
+        convfunc = F.conv_transpose3d
+    else:
+        raise Exception('The convolutional input is either 3, 4 or 5 dimensions.'
+                        ' input.dim = ' + str(input.dim()))
+    return convfunc(input, cat_kernels_4_quaternion, bias, stride, padding, output_padding, groups, dilatation)
+def quaternion_linear(input, r_weight, i_weight, j_weight, k_weight, bias=True):
+    """Applies a quaternion linear transformation to the incoming data:
+    It is important to notice that the forward phase of a QNN is defined
+    as W * Inputs (with * equal to the Hamilton product). The constructed
+    cat_kernels_4_quaternion is a modified version of the quaternion representation
+    so when we do torch.mm(Input,W) it's equivalent to W * Inputs.
+    """
+    cat_kernels_4_r = torch.cat(
+        [r_weight, -i_weight, -j_weight, -k_weight], dim=0)
+    cat_kernels_4_i = torch.cat(
+        [i_weight,  r_weight, -k_weight, j_weight], dim=0)
+    cat_kernels_4_j = torch.cat(
+        [j_weight,  k_weight, r_weight, -i_weight], dim=0)
+    cat_kernels_4_k = torch.cat(
+        [k_weight,  -j_weight, i_weight, r_weight], dim=0)
+    cat_kernels_4_quaternion = torch.cat(
+        [cat_kernels_4_r, cat_kernels_4_i, cat_kernels_4_j, cat_kernels_4_k], dim=1)
+    if input.dim() == 2:
+        if bias is not None:
+            return torch.addmm(bias, input, cat_kernels_4_quaternion)
+        else:
+            return torch.mm(input, cat_kernels_4_quaternion)
+    else:
+        output = torch.matmul(input, cat_kernels_4_quaternion)
+        if bias is not None:
+            return output+bias
+        else:
+            return output
+def quaternion_linear_rotation(input, zero_kernel, r_weight, i_weight, j_weight, k_weight, bias=None,
+                               quaternion_format=False, scale=None):
+    """Applies a quaternion rotation transformation to the incoming data:
+    The rotation W*x*W^t can be replaced by R*x following:
+    https://en.wikipedia.org/wiki/Quaternions_and_spatial_rotation
+    Works for unitary and non unitary weights.
+    The initial size of the input must be a multiple of 3 if quaternion_format = False and
+    4 if quaternion_format = True.
+    """
+    square_r = (r_weight*r_weight)
+    square_i = (i_weight*i_weight)
+    square_j = (j_weight*j_weight)
+    square_k = (k_weight*k_weight)
+    norm = torch.sqrt(square_r+square_i+square_j+square_k + 0.0001)
+    r_n_weight = (r_weight / norm)
+    i_n_weight = (i_weight / norm)
+    j_n_weight = (j_weight / norm)
+    k_n_weight = (k_weight / norm)
+    norm_factor = 2.0
+    square_i = norm_factor*(i_n_weight*i_n_weight)
+    square_j = norm_factor*(j_n_weight*j_n_weight)
+    square_k = norm_factor*(k_n_weight*k_n_weight)
+    ri = (norm_factor*r_n_weight*i_n_weight)
+    rj = (norm_factor*r_n_weight*j_n_weight)
+    rk = (norm_factor*r_n_weight*k_n_weight)
+    ij = (norm_factor*i_n_weight*j_n_weight)
+    ik = (norm_factor*i_n_weight*k_n_weight)
+    jk = (norm_factor*j_n_weight*k_n_weight)
+    if quaternion_format:
+        if scale is not None:
+            rot_kernel_1 = torch.cat([zero_kernel, scale * (1.0 - (square_j + square_k)),
+                                     scale * (ij-rk), scale * (ik+rj)], dim=0)
+            rot_kernel_2 = torch.cat([zero_kernel, scale * (ij+rk), scale *
+                                     (1.0 - (square_i + square_k)), scale * (jk-ri)], dim=0)
+            rot_kernel_3 = torch.cat([zero_kernel, scale * (ik-rj), scale * (jk+ri),
+                                     scale * (1.0 - (square_i + square_j))], dim=0)
+        else:
+            rot_kernel_1 = torch.cat(
+                [zero_kernel, (1.0 - (square_j + square_k)), (ij-rk), (ik+rj)], dim=0)
+            rot_kernel_2 = torch.cat(
+                [zero_kernel, (ij+rk), (1.0 - (square_i + square_k)), (jk-ri)], dim=0)
+            rot_kernel_3 = torch.cat(
+                [zero_kernel, (ik-rj), (jk+ri), (1.0 - (square_i + square_j))], dim=0)
+        zero_kernel2 = torch.cat(
+            [zero_kernel, zero_kernel, zero_kernel, zero_kernel], dim=0)
+        global_rot_kernel = torch.cat(
+            [zero_kernel2, rot_kernel_1, rot_kernel_2, rot_kernel_3], dim=1)
+    else:
+        if scale is not None:
+            rot_kernel_1 = torch.cat([scale * (1.0 - (square_j + square_k)),
+                                     scale * (ij-rk), scale * (ik+rj)], dim=0)
+            rot_kernel_2 = torch.cat(
+                [scale * (ij+rk), scale * (1.0 - (square_i + square_k)), scale * (jk-ri)], dim=0)
+            rot_kernel_3 = torch.cat([scale * (ik-rj), scale * (jk+ri), scale *
+                                     (1.0 - (square_i + square_j))], dim=0)
+        else:
+            rot_kernel_1 = torch.cat(
+                [1.0 - (square_j + square_k), (ij-rk), (ik+rj)], dim=0)
+            rot_kernel_2 = torch.cat(
+                [(ij+rk), 1.0 - (square_i + square_k), (jk-ri)], dim=0)
+            rot_kernel_3 = torch.cat(
+                [(ik-rj), (jk+ri), (1.0 - (square_i + square_j))], dim=0)
+        global_rot_kernel = torch.cat(
+            [rot_kernel_1, rot_kernel_2, rot_kernel_3], dim=1)
+    if input.dim() == 2:
+        if bias is not None:
+            return torch.addmm(bias, input, global_rot_kernel)
+        else:
+            return torch.mm(input, global_rot_kernel)
+    else:
+        output = torch.matmul(input, global_rot_kernel)
+        if bias is not None:
+            return output+bias
+        else:
+            return output
+# Custom AUTOGRAD for lower VRAM consumption
+class QuaternionLinearFunction(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, input, r_weight, i_weight, j_weight, k_weight, bias=None):
+        ctx.save_for_backward(input, r_weight, i_weight,
+                              j_weight, k_weight, bias)
+        check_input(input)
+        cat_kernels_4_r = torch.cat(
+            [r_weight, -i_weight, -j_weight, -k_weight], dim=0)
+        cat_kernels_4_i = torch.cat(
+            [i_weight,  r_weight, -k_weight, j_weight], dim=0)
+        cat_kernels_4_j = torch.cat(
+            [j_weight,  k_weight, r_weight, -i_weight], dim=0)
+        cat_kernels_4_k = torch.cat(
+            [k_weight,  -j_weight, i_weight, r_weight], dim=0)
+        cat_kernels_4_quaternion = torch.cat(
+            [cat_kernels_4_r, cat_kernels_4_i, cat_kernels_4_j, cat_kernels_4_k], dim=1)
+        if input.dim() == 2:
+            if bias is not None:
+                return torch.addmm(bias, input, cat_kernels_4_quaternion)
+            else:
+                return torch.mm(input, cat_kernels_4_quaternion)
+        else:
+            output = torch.matmul(input, cat_kernels_4_quaternion)
+            if bias is not None:
+                return output+bias
+            else:
+                return output
+    # This function has only a single output, so it gets only one gradient
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, r_weight, i_weight, j_weight, k_weight, bias = ctx.saved_tensors
+        grad_input = grad_weight_r = grad_weight_i = grad_weight_j = grad_weight_k = grad_bias = None
+        input_r = torch.cat([r_weight, -i_weight, -j_weight, -k_weight], dim=0)
+        input_i = torch.cat([i_weight,  r_weight, -k_weight, j_weight], dim=0)
+        input_j = torch.cat([j_weight,  k_weight, r_weight, -i_weight], dim=0)
+        input_k = torch.cat([k_weight,  -j_weight, i_weight, r_weight], dim=0)
+        cat_kernels_4_quaternion_T = Variable(
+            torch.cat([input_r, input_i, input_j, input_k], dim=1).permute(1, 0), requires_grad=False)
+        r = get_r(input)
+        i = get_i(input)
+        j = get_j(input)
+        k = get_k(input)
+        input_r = torch.cat([r, -i, -j, -k], dim=0)
+        input_i = torch.cat([i,  r, -k, j], dim=0)
+        input_j = torch.cat([j,  k, r, -i], dim=0)
+        input_k = torch.cat([k,  -j, i, r], dim=0)
+        input_mat = Variable(
+            torch.cat([input_r, input_i, input_j, input_k], dim=1), requires_grad=False)
+        r = get_r(grad_output)
+        i = get_i(grad_output)
+        j = get_j(grad_output)
+        k = get_k(grad_output)
+        input_r = torch.cat([r, i, j, k], dim=1)
+        input_i = torch.cat([-i,  r, k, -j], dim=1)
+        input_j = torch.cat([-j,  -k, r, i], dim=1)
+        input_k = torch.cat([-k,  j, -i, r], dim=1)
+        grad_mat = torch.cat([input_r, input_i, input_j, input_k], dim=0)
+        if ctx.needs_input_grad[0]:
+            grad_input = grad_output.mm(cat_kernels_4_quaternion_T)
+        if ctx.needs_input_grad[1]:
+            grad_weight = grad_mat.permute(1, 0).mm(input_mat).permute(1, 0)
+            unit_size_x = r_weight.size(0)
+            unit_size_y = r_weight.size(1)
+            grad_weight_r = grad_weight.narrow(
+                0, 0, unit_size_x).narrow(1, 0, unit_size_y)
+            grad_weight_i = grad_weight.narrow(
+                0, 0, unit_size_x).narrow(1, unit_size_y, unit_size_y)
+            grad_weight_j = grad_weight.narrow(
+                0, 0, unit_size_x).narrow(1, unit_size_y*2, unit_size_y)
+            grad_weight_k = grad_weight.narrow(
+                0, 0, unit_size_x).narrow(1, unit_size_y*3, unit_size_y)
+        if ctx.needs_input_grad[5]:
+            grad_bias = grad_output.sum(0).squeeze(0)
+        return grad_input, grad_weight_r, grad_weight_i, grad_weight_j, grad_weight_k, grad_bias
+def hamilton_product(q0, q1):
+    """
+    Applies a Hamilton product q0 * q1:
+    Shape:
+        - q0, q1 should be (batch_size, quaternion_number)
+        (rr' - xx' - yy' - zz')  +
+        (rx' + xr' + yz' - zy')i +
+        (ry' - xz' + yr' + zx')j +
+        (rz' + xy' - yx' + zr')k +
+    """
+    q1_r = get_r(q1)
+    q1_i = get_i(q1)
+    q1_j = get_j(q1)
+    q1_k = get_k(q1)
+    # rr', xx', yy', and zz'
+    r_base = torch.mul(q0, q1)
+    # (rr' - xx' - yy' - zz')
+    r = get_r(r_base) - get_i(r_base) - get_j(r_base) - get_k(r_base)
+    # rx', xr', yz', and zy'
+    i_base = torch.mul(q0, torch.cat([q1_i, q1_r, q1_k, q1_j], dim=1))
+    # (rx' + xr' + yz' - zy')
+    i = get_r(i_base) + get_i(i_base) + get_j(i_base) - get_k(i_base)
+    # ry', xz', yr', and zx'
+    j_base = torch.mul(q0, torch.cat([q1_j, q1_k, q1_r, q1_i], dim=1))
+    # (rx' + xr' + yz' - zy')
+    j = get_r(j_base) - get_i(j_base) + get_j(j_base) + get_k(j_base)
+    # rz', xy', yx', and zr'
+    k_base = torch.mul(q0, torch.cat([q1_k, q1_j, q1_i, q1_r], dim=1))
+    # (rx' + xr' + yz' - zy')
+    k = get_r(k_base) + get_i(k_base) - get_j(k_base) + get_k(k_base)
+    return torch.cat([r, i, j, k], dim=1)
+#
+# PARAMETERS INITIALIZATION
+#
+def unitary_init(in_features, out_features, rng, kernel_size=None, criterion='he'):
+    if kernel_size is not None:
+        receptive_field = np.prod(kernel_size)
+        fan_in = in_features * receptive_field
+        fan_out = out_features * receptive_field
+    else:
+        fan_in = in_features
+        fan_out = out_features
+    if kernel_size is None:
+        kernel_shape = (in_features, out_features)
+    else:
+        if type(kernel_size) is int:
+            kernel_shape = (out_features, in_features) + tuple((kernel_size,))
+        else:
+            kernel_shape = (out_features, in_features) + (*kernel_size,)
+    number_of_weights = np.prod(kernel_shape)
+    v_r = np.random.uniform(-1.0, 1.0, number_of_weights)
+    v_i = np.random.uniform(-1.0, 1.0, number_of_weights)
+    v_j = np.random.uniform(-1.0, 1.0, number_of_weights)
+    v_k = np.random.uniform(-1.0, 1.0, number_of_weights)
+    # Unitary quaternion
+    for i in range(0, number_of_weights):
+        norm = np.sqrt(v_r[i]**2 + v_i[i]**2 + v_j[i]**2 + v_k[i]**2)+0.0001
+        v_r[i] /= norm
+        v_i[i] /= norm
+        v_j[i] /= norm
+        v_k[i] /= norm
+    v_r = v_r.reshape(kernel_shape)
+    v_i = v_i.reshape(kernel_shape)
+    v_j = v_j.reshape(kernel_shape)
+    v_k = v_k.reshape(kernel_shape)
+    return (v_r, v_i, v_j, v_k)
+def random_init(in_features, out_features, rng, kernel_size=None, criterion='glorot'):
+    if kernel_size is not None:
+        receptive_field = np.prod(kernel_size)
+        fan_in = in_features * receptive_field
+        fan_out = out_features * receptive_field
+    else:
+        fan_in = in_features
+        fan_out = out_features
+    if criterion == 'glorot':
+        s = 1. / np.sqrt(2*(fan_in + fan_out))
+    elif criterion == 'he':
+        s = 1. / np.sqrt(2*fan_in)
+    else:
+        raise ValueError('Invalid criterion: ' + criterion)
+    if kernel_size is None:
+        kernel_shape = (in_features, out_features)
+    else:
+        if type(kernel_size) is int:
+            kernel_shape = (out_features, in_features) + tuple((kernel_size,))
+        else:
+            kernel_shape = (out_features, in_features) + (*kernel_size,)
+    number_of_weights = np.prod(kernel_shape)
+    v_r = np.random.uniform(-1.0, 1.0, number_of_weights)
+    v_i = np.random.uniform(-1.0, 1.0, number_of_weights)
+    v_j = np.random.uniform(-1.0, 1.0, number_of_weights)
+    v_k = np.random.uniform(-1.0, 1.0, number_of_weights)
+    v_r = v_r.reshape(kernel_shape)
+    v_i = v_i.reshape(kernel_shape)
+    v_j = v_j.reshape(kernel_shape)
+    v_k = v_k.reshape(kernel_shape)
+    weight_r = v_r
+    weight_i = v_i
+    weight_j = v_j
+    weight_k = v_k
+    return (weight_r, weight_i, weight_j, weight_k)
+def quaternion_init(in_features, out_features, rng, kernel_size=None, criterion='glorot'):
+    if kernel_size is not None:
+        receptive_field = np.prod(kernel_size)
+        fan_in = in_features * receptive_field
+        fan_out = out_features * receptive_field
+    else:
+        fan_in = in_features
+        fan_out = out_features
+    if criterion == 'glorot':
+        s = 1. / np.sqrt(2*(fan_in + fan_out))
+    elif criterion == 'he':
+        s = 1. / np.sqrt(2*fan_in)
+    else:
+        raise ValueError('Invalid criterion: ' + criterion)
+    rng = RandomState(np.random.randint(1, 1234))
+    # Generating randoms and purely imaginary quaternions :
+    if kernel_size is None:
+        kernel_shape = (in_features, out_features)
+    else:
+        if type(kernel_size) is int:
+            kernel_shape = (out_features, in_features) + tuple((kernel_size,))
+        else:
+            kernel_shape = (out_features, in_features) + (*kernel_size,)
+    modulus = chi.rvs(4, loc=0, scale=s, size=kernel_shape)
+    number_of_weights = np.prod(kernel_shape)
+    v_i = np.random.uniform(-1.0, 1.0, number_of_weights)
+    v_j = np.random.uniform(-1.0, 1.0, number_of_weights)
+    v_k = np.random.uniform(-1.0, 1.0, number_of_weights)
+    # Purely imaginary quaternions unitary
+    for i in range(0, number_of_weights):
+        norm = np.sqrt(v_i[i]**2 + v_j[i]**2 + v_k[i]**2 + 0.0001)
+        v_i[i] /= norm
+        v_j[i] /= norm
+        v_k[i] /= norm
+    v_i = v_i.reshape(kernel_shape)
+    v_j = v_j.reshape(kernel_shape)
+    v_k = v_k.reshape(kernel_shape)
+    phase = rng.uniform(low=-np.pi, high=np.pi, size=kernel_shape)
+    weight_r = modulus * np.cos(phase)
+    weight_i = modulus * v_i*np.sin(phase)
+    weight_j = modulus * v_j*np.sin(phase)
+    weight_k = modulus * v_k*np.sin(phase)
+    return (weight_r, weight_i, weight_j, weight_k)
+def create_dropout_mask(dropout_p, size, rng, as_type, operation='linear'):
+    if operation == 'linear':
+        mask = rng.binomial(n=1, p=1-dropout_p, size=size)
+        return Variable(torch.from_numpy(mask).type(as_type))
+    else:
+        raise Exception("create_dropout_mask accepts only 'linear'. Found operation = "
+                        + str(operation))
+def affect_init(r_weight, i_weight, j_weight, k_weight, init_func, rng, init_criterion):
+    if r_weight.size() != i_weight.size() or r_weight.size() != j_weight.size() or \
+            r_weight.size() != k_weight.size():
+        raise ValueError('The real and imaginary weights '
+                         'should have the same size . Found: r:'
+                         + str(r_weight.size()) + ' i:'
+                         + str(i_weight.size()) + ' j:'
+                         + str(j_weight.size()) + ' k:'
+                         + str(k_weight.size()))
+    elif r_weight.dim() != 2:
+        raise Exception('affect_init accepts only matrices. Found dimension = '
+                        + str(r_weight.dim()))
+    kernel_size = None
+    r, i, j, k = init_func(r_weight.size(0), r_weight.size(
+        1), rng, kernel_size, init_criterion)
+    r, i, j, k = torch.from_numpy(r), torch.from_numpy(
+        i), torch.from_numpy(j), torch.from_numpy(k)
+    r_weight.data = r.type_as(r_weight.data)
+    i_weight.data = i.type_as(i_weight.data)
+    j_weight.data = j.type_as(j_weight.data)
+    k_weight.data = k.type_as(k_weight.data)
+def affect_init_conv(r_weight, i_weight, j_weight, k_weight, kernel_size, init_func, rng,
+                     init_criterion):
+    if r_weight.size() != i_weight.size() or r_weight.size() != j_weight.size() or \
+            r_weight.size() != k_weight.size():
+        raise ValueError('The real and imaginary weights '
+                         'should have the same size . Found: r:'
+                         + str(r_weight.size()) + ' i:'
+                         + str(i_weight.size()) + ' j:'
+                         + str(j_weight.size()) + ' k:'
+                         + str(k_weight.size()))
+    elif 2 >= r_weight.dim():
+        raise Exception('affect_conv_init accepts only tensors that have more than 2 dimensions. Found dimension = '
+                        + str(real_weight.dim()))
+    r, i, j, k = init_func(
+        r_weight.size(1),
+        r_weight.size(0),
+        rng=rng,
+        kernel_size=kernel_size,
+        criterion=init_criterion
+    )
+    r, i, j, k = torch.from_numpy(r), torch.from_numpy(
+        i), torch.from_numpy(j), torch.from_numpy(k)
+    r_weight.data = r.type_as(r_weight.data)
+    i_weight.data = i.type_as(i_weight.data)
+    j_weight.data = j.type_as(j_weight.data)
+    k_weight.data = k.type_as(k_weight.data)
+def get_kernel_and_weight_shape(operation, in_channels, out_channels, kernel_size):
+    if operation == 'convolution1d':
+        if type(kernel_size) is not int:
+            raise ValueError(
+                """An invalid kernel_size was supplied for a 1d convolution. The kernel size
+                must be integer in the case. Found kernel_size = """ + str(kernel_size)
+            )
+        else:
+            ks = kernel_size
+            w_shape = (out_channels, in_channels) + tuple((ks,))
+    else:  # in case it is 2d or 3d.
+        if operation == 'convolution2d' and type(kernel_size) is int:
+            ks = (kernel_size, kernel_size)
+        elif operation == 'convolution3d' and type(kernel_size) is int:
+            ks = (kernel_size, kernel_size, kernel_size)
+        elif type(kernel_size) is not int:
+            if operation == 'convolution2d' and len(kernel_size) != 2:
+                raise ValueError(
+                    """An invalid kernel_size was supplied for a 2d convolution. The kernel size
+                    must be either an integer or a tuple of 2. Found kernel_size = """ + str(kernel_size)
+                )
+            elif operation == 'convolution3d' and len(kernel_size) != 3:
+                raise ValueError(
+                    """An invalid kernel_size was supplied for a 3d convolution. The kernel size
+                    must be either an integer or a tuple of 3. Found kernel_size = """ + str(kernel_size)
+                )
+            else:
+                ks = kernel_size
+        w_shape = (out_channels, in_channels) + (*ks,)
+    return ks, w_shape

models/phc_models.py ADDED Viewed

	@@ -0,0 +1,365 @@

+'''ResNet in PyTorch.
+For Pre-activation ResNet, see 'preact_resnet.py'.
+Reference:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+    Deep Residual Learning for Image Recognition. arXiv:1512.03385
+'''
+import sys
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from models.hypercomplex_layers import PHConv
+from utils.utils import load_weights
+sys.path.append('./models')
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, in_planes, planes, stride=1, n=4):
+        super().__init__()
+        self.conv1 = PHConv(n,
+                            in_planes, planes, kernel_size=3, stride=stride, padding=1)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = PHConv(n, planes, planes, kernel_size=3,
+                            stride=1, padding=1)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                PHConv(n, in_planes, self.expansion*planes,
+                       kernel_size=1, stride=stride,),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 2
+    def __init__(self, in_planes, planes, stride=1, n=4):
+        super().__init__()
+        self.conv1 = PHConv(n, in_planes, planes, kernel_size=1, stride=1)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = PHConv(n, planes, planes, kernel_size=3,
+                            stride=stride, padding=1)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = PHConv(n, planes, self.expansion *
+                            planes, kernel_size=1, stride=1)
+        self.bn3 = nn.BatchNorm2d(self.expansion*planes)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                PHConv(n, in_planes, self.expansion*planes,
+                       kernel_size=1, stride=stride),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+class PHCResNet(nn.Module):
+    """PHCResNet.
+    Parameters:
+    - before_gap_output: True to return the output before refiner blocks and gap
+    - gap_output: True to return the output after gap and before final linear layer
+    """
+    def __init__(self, block, num_blocks, channels=4, n=4, num_classes=10, before_gap_output=False, gap_output=False, visualize=False):
+        super().__init__()
+        self.block = block
+        self.num_blocks = num_blocks
+        self.in_planes = 64
+        self.n = n
+        self.before_gap_out = before_gap_output
+        self.gap_output = gap_output
+        self.visualize = visualize
+        self.conv1 = PHConv(n, channels, 64, kernel_size=3,
+                            stride=1, padding=1)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1, n=n)
+        self.layer2 = self._make_layer(
+            block, 128, num_blocks[1], stride=2, n=n)
+        self.layer3 = self._make_layer(
+            block, 256, num_blocks[2], stride=2, n=n)
+        self.layer4 = self._make_layer(
+            block, 512, num_blocks[3], stride=2, n=n)
+        # Refiner blocks
+        self.layer5 = None
+        self.layer6 = None
+        if not before_gap_output and not gap_output:
+            self.linear = nn.Linear(512*block.expansion, num_classes)
+    def add_top_blocks(self, num_classes=1):
+        # print("Adding top blocks with n = ", self.n)
+        self.layer5 = self._make_layer(Bottleneck, 512, 2, stride=2, n=self.n)
+        self.layer6 = self._make_layer(Bottleneck, 512, 2, stride=2, n=self.n)
+        if not self.before_gap_out and not self.gap_output:
+            self.linear = nn.Linear(1024, num_classes)
+    def _make_layer(self, block, planes, num_blocks, stride, n):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride, n))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out4 = self.layer4(out)
+        if self.before_gap_out:
+            return out4
+        if self.layer5:
+            out5 = self.layer5(out4)
+            out6 = self.layer6(out5)
+        # global average pooling (GAP)
+        n, c, _, _ = out6.size()
+        out = out6.view(n, c, -1).mean(-1)
+        if self.gap_output:
+            return out
+        out = self.linear(out)
+        if self.visualize:
+            # return the final output and activation maps at two different levels
+            return out, out4, out6
+        return out
+class Encoder(nn.Module):
+    """Encoder branch in PHYSBOnet."""
+    def __init__(self, channels, n):
+        super().__init__()
+        self.in_planes = 64
+        self.conv1 = PHConv(n, channels, 64, kernel_size=3,
+                            stride=1, padding=1)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(BasicBlock, 64, 2, stride=1, n=n)
+        self.layer2 = self._make_layer(BasicBlock, 128, 2, stride=2, n=n)
+    def _make_layer(self, block, planes, num_blocks, stride, n):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride, n))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        return out
+class SharedBottleneck(nn.Module):
+    """SharedBottleneck in PHYSBOnet."""
+    def __init__(self, n, in_planes):
+        super().__init__()
+        self.in_planes = in_planes
+        self.layer3 = self._make_layer(BasicBlock, 256, 2, stride=2, n=n)
+        self.layer4 = self._make_layer(BasicBlock, 512, 2, stride=2, n=n)
+        self.layer5 = self._make_layer(Bottleneck, 512, 2, stride=2, n=n)
+        self.layer6 = self._make_layer(Bottleneck, 512, 2, stride=2, n=n)
+    def _make_layer(self, block, planes, num_blocks, stride, n):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride, n))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = self.layer3(x)
+        out = self.layer4(out)
+        out = self.layer5(out)
+        out = self.layer6(out)
+        n, c, _, _ = out.size()
+        out = out.view(n, c, -1).mean(-1)
+        return out
+class Classifier(nn.Module):
+    """Classifier branch in PHYSEnet."""
+    def __init__(self, n, num_classes, in_planes=512, visualize=False):
+        super().__init__()
+        self.in_planes = in_planes
+        self.visualize = visualize
+        # Refiner blocks
+        self.layer5 = self._make_layer(Bottleneck, 512, 2, stride=2, n=n)
+        self.layer6 = self._make_layer(Bottleneck, 512, 2, stride=2, n=n)
+        self.linear = nn.Linear(1024, num_classes)
+    def _make_layer(self, block, planes, num_blocks, stride, n):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride, n))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = self.layer5(x)
+        feature_maps = self.layer6(out)
+        n, c, _, _ = feature_maps.size()
+        out = feature_maps.view(n, c, -1).mean(-1)
+        out = self.linear(out)
+        if self.visualize:
+            return out, feature_maps
+        return out
+class PHYSBOnet(nn.Module):
+    """PHYSBOnet.
+    Parameters:
+    - shared: True to share the Bottleneck between the two sides, False for the 'concat' version.
+    - weights: path to pretrained weights of patch classifier for Encoder branches
+    """
+    def __init__(self, n, shared=True, num_classes=1, weights=None):
+        super().__init__()
+        self.shared = shared
+        self.encoder_sx = Encoder(channels=2, n=2)
+        self.encoder_dx = Encoder(channels=2, n=2)
+        self.shared_resnet = SharedBottleneck(
+            n, in_planes=128 if shared else 256)
+        if weights:
+            load_weights(self.encoder_sx, weights)
+            load_weights(self.encoder_dx, weights)
+        self.classifier_sx = nn.Linear(1024, num_classes)
+        self.classifier_dx = nn.Linear(1024, num_classes)
+    def forward(self, x):
+        x_sx, x_dx = x
+        # Apply Encoder
+        out_sx = self.encoder_sx(x_sx)
+        out_dx = self.encoder_dx(x_dx)
+        # Shared layers
+        if self.shared:
+            out_sx = self.shared_resnet(out_sx)
+            out_dx = self.shared_resnet(out_dx)
+            out_sx = self.classifier_sx(out_sx)
+            out_dx = self.classifier_dx(out_dx)
+        else:  # Concat version
+            out = torch.cat([out_sx, out_dx], dim=1)
+            out = self.shared_resnet(out)
+            out_sx = self.classifier_sx(out)
+            out_dx = self.classifier_dx(out)
+        out = torch.cat([out_sx, out_dx], dim=0)
+        return out
+class PHYSEnet(nn.Module):
+    """PHYSEnet.
+    Parameters:
+    - weights: path to pretrained weights of patch classifier for PHCResNet18 encoder or path to whole-image classifier
+    - patch_weights: True if the weights correspond to patch classifier, False if they are whole-image.
+                     In the latter case also Classifier branches will be initialized.
+    """
+    def __init__(self, n=2, num_classes=1, weights=None, patch_weights=True, visualize=False):
+        super().__init__()
+        self.visualize = visualize
+        self.phcresnet18 = PHCResNet18(
+            n=2, num_classes=num_classes, channels=2, before_gap_output=True)
+        if weights:
+            print('Loading weights for phcresnet18 from ', weights)
+            load_weights(self.phcresnet18, weights)
+        self.classifier_sx = Classifier(n, num_classes, visualize=visualize)
+        self.classifier_dx = Classifier(n, num_classes, visualize=visualize)
+        if not patch_weights and weights:
+            print('Loading weights for classifiers from ', weights)
+            load_weights(self.classifier_sx, weights)
+            load_weights(self.classifier_dx, weights)
+    def forward(self, x):
+        x_sx, x_dx = x
+        # Apply Encoder
+        out_enc_sx = self.phcresnet18(x_sx)
+        out_enc_dx = self.phcresnet18(x_dx)
+        if self.visualize:
+            out_sx, act_sx = self.classifier_sx(out_enc_sx)
+            out_dx, act_dx = self.classifier_dx(out_enc_dx)
+        else:
+            # Apply refiner blocks + classifier
+            out_sx = self.classifier_sx(out_enc_sx)
+            out_dx = self.classifier_dx(out_enc_dx)
+        out = torch.cat([out_sx, out_dx], dim=0)
+        if self.visualize:
+            return out, out_enc_sx, out_enc_dx, act_sx, act_dx
+        return out
+def PHCResNet18(channels=4, n=4, num_classes=10, before_gap_output=False, gap_output=False, visualize=False):
+    return PHCResNet(BasicBlock,
+                     [2, 2, 2, 2],
+                     channels=channels,
+                     n=n,
+                     num_classes=num_classes,
+                     before_gap_output=before_gap_output,
+                     gap_output=gap_output,
+                     visualize=visualize)
+def PHCResNet50(channels=4, n=4, num_classes=10):
+    return PHCResNet(Bottleneck, [3, 4, 6, 3], channels=channels, n=n, num_classes=num_classes)

models/real_models.py ADDED Viewed

	@@ -0,0 +1,333 @@

+'''ResNet in PyTorch.
+For Pre-activation ResNet, see 'preact_resnet.py'.
+Reference:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+    Deep Residual Learning for Image Recognition. arXiv:1512.03385
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from utils.utils import load_weights
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, in_planes, planes, stride=1):
+        super().__init__()
+        self.conv1 = nn.Conv2d(
+            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 2
+    def __init__(self, in_planes, planes, stride=1):
+        super().__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                               stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, self.expansion *
+                               planes, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(self.expansion*planes)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+class ResNet(nn.Module):
+    def __init__(self, block, num_blocks, channels=4, num_classes=10, gap_output=False, before_gap_output=False, visualize=False):
+        super().__init__()
+        self.block = block
+        self.num_blocks = num_blocks
+        self.in_planes = 64
+        self.gap_output = gap_output
+        self.before_gap_out = before_gap_output
+        self.visualize = visualize
+        self.conv1 = nn.Conv2d(channels, 64, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+        self.layer5 = None
+        self.layer6 = None
+        if not gap_output and not before_gap_output:
+            self.linear = nn.Linear(512*block.expansion, num_classes)
+    def add_top_blocks(self, num_classes=1):
+        self.layer5 = self._make_layer(Bottleneck, 512, 2, stride=2)
+        self.layer6 = self._make_layer(Bottleneck, 512, 2, stride=2)
+        if not self.gap_output and not self.before_gap_out:
+            self.linear = nn.Linear(1024, num_classes)
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out4 = self.layer4(out)
+        if self.before_gap_out:
+            return out4
+        if self.layer5:
+            out5 = self.layer5(out4)
+            out6 = self.layer6(out5)
+        n, c, _, _ = out6.size()
+        out = out6.view(n, c, -1).mean(-1)
+        if self.gap_output:
+            return out
+        out = self.linear(out)
+        if self.visualize:
+            return out, out4, out6
+        return out
+class Encoder(nn.Module):
+    def __init__(self, channels):
+        super().__init__()
+        self.in_planes = 64
+        self.conv1 = nn.Conv2d(channels, 64, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(BasicBlock, 64, 2, stride=1)
+        self.layer2 = self._make_layer(BasicBlock, 128, 2, stride=2)
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        return out
+class SharedBottleneck(nn.Module):
+    def __init__(self, in_planes):
+        super().__init__()
+        self.in_planes = in_planes
+        self.layer3 = self._make_layer(BasicBlock, 256, 2, stride=2)
+        self.layer4 = self._make_layer(BasicBlock, 512, 2, stride=2)
+        self.layer5 = self._make_layer(Bottleneck, 512, 2, stride=2)
+        self.layer6 = self._make_layer(Bottleneck, 512, 2, stride=2)
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = self.layer3(x)
+        out = self.layer4(out)
+        out = self.layer5(out)
+        out = self.layer6(out)
+        n, c, _, _ = out.size()
+        out = out.view(n, c, -1).mean(-1)
+        return out
+class Classifier(nn.Module):
+    def __init__(self, num_classes, in_planes=512, visualize=False):
+        super().__init__()
+        self.in_planes = in_planes
+        self.visualize = visualize
+        self.layer5 = self._make_layer(Bottleneck, 512, 2, stride=2)
+        self.layer6 = self._make_layer(Bottleneck, 512, 2, stride=2)
+        self.linear = nn.Linear(1024, num_classes)
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = self.layer5(x)
+        feature_maps = self.layer6(out)
+        n, c, _, _ = feature_maps.size()
+        out = feature_maps.view(n, c, -1).mean(-1)
+        out = self.linear(out)
+        if self.visualize:
+            return out, feature_maps
+        return out
+class SBOnet(nn.Module):
+    """SBOnet.
+    Parameters:
+    - shared: True to share the Bottleneck between the two sides, False for the 'concat' version.
+    - weights: path to pretrained weights of patch classifier for Encoder branches
+    """
+    def __init__(self, shared=True, num_classes=1, weights=None):
+        super().__init__()
+        self.shared = shared
+        self.encoder_sx = Encoder(channels=2)
+        self.encoder_dx = Encoder(channels=2)
+        self.shared_resnet = SharedBottleneck(in_planes=128 if shared else 256)
+        if weights:
+            load_weights(self.encoder_sx, weights)
+            load_weights(self.encoder_dx, weights)
+        self.classifier_sx = nn.Linear(1024, num_classes)
+        self.classifier_dx = nn.Linear(1024, num_classes)
+    def forward(self, x):
+        x_sx, x_dx = x
+        # Apply Encoder
+        out_sx = self.encoder_sx(x_sx)
+        out_dx = self.encoder_dx(x_dx)
+        # Shared layers
+        if self.shared:
+            out_sx = self.shared_resnet(out_sx)
+            out_dx = self.shared_resnet(out_dx)
+            out_sx = self.classifier_sx(out_sx)
+            out_dx = self.classifier_dx(out_dx)
+        else:  # Concat version
+            out = torch.cat([out_sx, out_dx], dim=1)
+            out = self.shared_resnet(out)
+            out_sx = self.classifier_sx(out)
+            out_dx = self.classifier_dx(out)
+        out = torch.cat([out_sx, out_dx], dim=0)
+        return out
+class SEnet(nn.Module):
+    """SEnet.
+    Parameters:
+    - weights: path to pretrained weights of patch classifier for PHCResNet18 encoder or path to whole-image classifier
+    - patch_weights: True if the weights correspond to patch classifier, False if they are whole-image.
+                     In the latter case also Classifier branches will be initialized.
+    """
+    def __init__(self, num_classes=1, weights=None, patch_weights=True, visualize=False):
+        super().__init__()
+        self.visualize = visualize
+        self.resnet18 = ResNet18(
+            num_classes=num_classes, channels=2, before_gap_output=True)
+        if weights:
+            print('Loading weights for resnet18 from ', weights)
+            load_weights(self.resnet18, weights)
+        self.classifier_sx = Classifier(num_classes, visualize=visualize)
+        self.classifier_dx = Classifier(num_classes, visualize=visualize)
+        if not patch_weights and weights:
+            print('Loading weights for classifiers from ', weights)
+            load_weights(self.classifier_sx, weights)
+            load_weights(self.classifier_dx, weights)
+    def forward(self, x):
+        x_sx, x_dx = x
+        # Apply Encoder
+        out_enc_sx = self.resnet18(x_sx)
+        out_enc_dx = self.resnet18(x_dx)
+        if self.visualize:
+            out_sx, act_sx = self.classifier_sx(out_enc_sx)
+            out_dx, act_dx = self.classifier_dx(out_enc_dx)
+        else:
+            # Apply refiner blocks + classifier
+            out_sx = self.classifier_sx(out_enc_sx)
+            out_dx = self.classifier_dx(out_enc_dx)
+        out = torch.cat([out_sx, out_dx], dim=0)
+        if self.visualize:
+            return out, out_enc_sx, out_enc_dx, act_sx, act_dx
+        return out
+def ResNet18(num_classes=10, channels=4, gap_output=False, before_gap_output=False, visualize=False):
+    return ResNet(BasicBlock,
+                  [2, 2, 2, 2],
+                  num_classes=num_classes,
+                  channels=channels,
+                  gap_output=gap_output,
+                  before_gap_output=before_gap_output,
+                  visualize=visualize)
+def ResNet50(num_classes=10, channels=4):
+    return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, channels=channels)

utils/__init__.py ADDED Viewed

File without changes

utils/utils.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import torch
+def mean_activations(tensor):
+    """Computes mean of activation maps tensor."""
+    # squeeze to remove batch dimension
+    return torch.mean(tensor.detach().cpu(), dim=1).squeeze(dim=0)
+def load_weights(model, weights):
+    """Loads the weights of only the layers present in the given model."""
+    pretrained_dict = torch.load(weights, map_location='cpu')
+    model_dict = model.state_dict()
+    pretrained_dict = {k: v for k,
+                       v in pretrained_dict.items() if k in model_dict}
+    model_dict.update(pretrained_dict)
+    model.load_state_dict(model_dict)