AlexK-PL commited on
Commit
35c22ce
·
1 Parent(s): 4e48a9d

Delete nn_layers.py

Browse files
Files changed (1) hide show
  1. nn_layers.py +0 -105
nn_layers.py DELETED
@@ -1,105 +0,0 @@
1
- import torch
2
- from torch import nn
3
- from librosa.filters import mel as librosa_mel_fn
4
- from stft import STFT
5
-
6
- clip_val = 1e-5
7
- C = 1
8
-
9
-
10
- class convolutional_module(nn.Module):
11
- """This class defines a 1d convolutional layer and its initialization for the system we are
12
- replicating"""
13
- def __init__(self, in_ch, out_ch, kernel_size=1, stride=1, padding=None, dilation=1, bias=True,
14
- w_init_gain='linear'):
15
- # in PyTorch you define your Models as subclasses of torch.nn.Module
16
- super(convolutional_module, self).__init__()
17
- if padding is None:
18
- assert(kernel_size % 2 == 1)
19
- padding = int(dilation * (kernel_size - 1) / 2)
20
-
21
- # initialize the convolutional layer which is an instance of Conv1d
22
- # torch.nn.Conv1d calls internally the method torch.nn.functional.conv1d, which accepts the
23
- # input with the shape (minibatch x in_channels x input_w), and a weight of shape
24
- # (out_channels x (in_channels/groups) x kernel_w). In our case, we do not split into groups.
25
- # Then, our input shape will be (48 x 512 x 189) and the weights are set up as
26
- # (512 x 512 x 5)
27
- self.conv_layer = torch.nn.Conv1d(in_ch, out_ch, kernel_size=kernel_size, stride=stride,
28
- padding=padding, dilation=dilation, bias=bias)
29
-
30
- """Useful information of Xavier initialization in:
31
- https://prateekvjoshi.com/2016/03/29/understanding-xavier-initialization-in-deep-neural-networks/"""
32
- torch.nn.init.xavier_uniform_(self.conv_layer.weight, gain=torch.nn.init.calculate_gain(w_init_gain))
33
-
34
- def forward(self, x):
35
- conv_output = self.conv_layer(x)
36
- return conv_output
37
-
38
-
39
- class linear_module(torch.nn.Module):
40
- """This class defines a linear layer and its initialization method for the system we are
41
- replicating. This implements a linear transformation: y = xA^t + b"""
42
- def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'):
43
- super(linear_module, self).__init__()
44
- self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias)
45
-
46
- torch.nn.init.xavier_uniform_(self.linear_layer.weight, gain=torch.nn.init.calculate_gain(w_init_gain))
47
-
48
- def forward(self, x):
49
- return self.linear_layer(x)
50
-
51
-
52
- class location_layer(nn.Module):
53
- def __init__(self, attention_n_filters, attention_kernel_size, attention_dim):
54
- super(location_layer, self).__init__()
55
- padding = int((attention_kernel_size - 1) / 2)
56
- """We are being very restricting without training a bias"""
57
- """I think in_channels = 2 is k (number of vectors for every encoded stage position from prev.
58
- alignment)."""
59
- self.location_conv = convolutional_module(2, attention_n_filters, kernel_size=attention_kernel_size,
60
- padding=padding, bias=False, stride=1, dilation=1)
61
- self.location_dense = linear_module(attention_n_filters, attention_dim, bias=False,
62
- w_init_gain='tanh')
63
-
64
- def forward(self, attention_weights_cat):
65
- processed_attention = self.location_conv(attention_weights_cat)
66
- processed_attention = processed_attention.transpose(1, 2)
67
- processed_attention = self.location_dense(processed_attention)
68
- return processed_attention
69
-
70
-
71
- class TacotronSTFT(nn.Module):
72
- def __init__(self, filter_length=1024, hop_length=256, win_length=1024,
73
- n_mel_channels=80, sampling_rate=22050, mel_fmin=0.0,
74
- mel_fmax=8000.0):
75
- super(TacotronSTFT, self).__init__()
76
- self.n_mel_channels = n_mel_channels
77
- self.sampling_rate = sampling_rate
78
- self.stft_fn = STFT(filter_length, hop_length, win_length)
79
- mel_basis = librosa_mel_fn(
80
- sampling_rate, filter_length, n_mel_channels, mel_fmin, mel_fmax)
81
- mel_basis = torch.from_numpy(mel_basis).float()
82
- self.register_buffer('mel_basis', mel_basis)
83
-
84
- def spectral_de_normalize(self, magnitudes):
85
- output = torch.exp(magnitudes) / C
86
- return output
87
-
88
- def mel_spectrogram(self, y):
89
- """Computes mel-spectrograms from a batch of waves
90
- PARAMS
91
- ------
92
- y: Variable(torch.FloatTensor) with shape (B, T) in range [-1, 1]
93
-
94
- RETURNS
95
- -------
96
- mel_output: torch.FloatTensor of shape (B, n_mel_channels, T)
97
- """
98
- assert(torch.min(y.data) >= -1)
99
- assert(torch.max(y.data) <= 1)
100
-
101
- magnitudes, phases = self.stft_fn.transform(y)
102
- magnitudes = magnitudes.data
103
- mel_output = torch.matmul(self.mel_basis, magnitudes)
104
- mel_output = torch.log(torch.clamp(mel_output, min=clip_val) * C)
105
- return mel_output