Spaces:
Runtime error
Runtime error
| import torch | |
| import torch.nn.functional as F | |
| from torch import nn | |
| from torch.nn.utils import spectral_norm | |
| from torch.nn.utils.parametrizations import weight_norm | |
| from TTS.utils.audio.torch_transforms import TorchSTFT | |
| from TTS.vocoder.models.hifigan_discriminator import MultiPeriodDiscriminator | |
| LRELU_SLOPE = 0.1 | |
| class SpecDiscriminator(nn.Module): | |
| """docstring for Discriminator.""" | |
| def __init__(self, fft_size=1024, hop_length=120, win_length=600, use_spectral_norm=False): | |
| super().__init__() | |
| norm_f = weight_norm if use_spectral_norm is False else spectral_norm | |
| self.fft_size = fft_size | |
| self.hop_length = hop_length | |
| self.win_length = win_length | |
| self.stft = TorchSTFT(fft_size, hop_length, win_length) | |
| self.discriminators = nn.ModuleList( | |
| [ | |
| norm_f(nn.Conv2d(1, 32, kernel_size=(3, 9), padding=(1, 4))), | |
| norm_f(nn.Conv2d(32, 32, kernel_size=(3, 9), stride=(1, 2), padding=(1, 4))), | |
| norm_f(nn.Conv2d(32, 32, kernel_size=(3, 9), stride=(1, 2), padding=(1, 4))), | |
| norm_f(nn.Conv2d(32, 32, kernel_size=(3, 9), stride=(1, 2), padding=(1, 4))), | |
| norm_f(nn.Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))), | |
| ] | |
| ) | |
| self.out = norm_f(nn.Conv2d(32, 1, 3, 1, 1)) | |
| def forward(self, y): | |
| fmap = [] | |
| with torch.no_grad(): | |
| y = y.squeeze(1) | |
| y = self.stft(y) | |
| y = y.unsqueeze(1) | |
| for _, d in enumerate(self.discriminators): | |
| y = d(y) | |
| y = F.leaky_relu(y, LRELU_SLOPE) | |
| fmap.append(y) | |
| y = self.out(y) | |
| fmap.append(y) | |
| return torch.flatten(y, 1, -1), fmap | |
| class MultiResSpecDiscriminator(torch.nn.Module): | |
| def __init__( # pylint: disable=dangerous-default-value | |
| self, fft_sizes=[1024, 2048, 512], hop_sizes=[120, 240, 50], win_lengths=[600, 1200, 240], window="hann_window" | |
| ): | |
| super().__init__() | |
| self.discriminators = nn.ModuleList( | |
| [ | |
| SpecDiscriminator(fft_sizes[0], hop_sizes[0], win_lengths[0], window), | |
| SpecDiscriminator(fft_sizes[1], hop_sizes[1], win_lengths[1], window), | |
| SpecDiscriminator(fft_sizes[2], hop_sizes[2], win_lengths[2], window), | |
| ] | |
| ) | |
| def forward(self, x): | |
| scores = [] | |
| feats = [] | |
| for d in self.discriminators: | |
| score, feat = d(x) | |
| scores.append(score) | |
| feats.append(feat) | |
| return scores, feats | |
| class UnivnetDiscriminator(nn.Module): | |
| """Univnet discriminator wrapping MPD and MSD.""" | |
| def __init__(self): | |
| super().__init__() | |
| self.mpd = MultiPeriodDiscriminator() | |
| self.msd = MultiResSpecDiscriminator() | |
| def forward(self, x): | |
| """ | |
| Args: | |
| x (Tensor): input waveform. | |
| Returns: | |
| List[Tensor]: discriminator scores. | |
| List[List[Tensor]]: list of list of features from each layers of each discriminator. | |
| """ | |
| scores, feats = self.mpd(x) | |
| scores_, feats_ = self.msd(x) | |
| return scores + scores_, feats + feats_ | |