Spaces:
Sleeping
Sleeping
| import math | |
| from typing import Optional | |
| import torch | |
| from torch import nn | |
| from .vits_config import VitsConfig | |
| from .flow import VitsWaveNet | |
| #............................................. | |
| class VitsPosteriorEncoder(nn.Module): | |
| def __init__(self, config: VitsConfig): | |
| super().__init__() | |
| self.out_channels = config.flow_size | |
| self.conv_pre = nn.Conv1d(config.spectrogram_bins, config.hidden_size, 1) | |
| self.wavenet = VitsWaveNet(config, num_layers=config.posterior_encoder_num_wavenet_layers) | |
| self.conv_proj = nn.Conv1d(config.hidden_size, self.out_channels * 2, 1) | |
| def forward(self, inputs, padding_mask, global_conditioning=None): | |
| inputs = self.conv_pre(inputs) * padding_mask | |
| inputs = self.wavenet(inputs, padding_mask, global_conditioning) | |
| stats = self.conv_proj(inputs) * padding_mask | |
| mean, log_stddev = torch.split(stats, self.out_channels, dim=1) | |
| sampled = (mean + torch.randn_like(mean) * torch.exp(log_stddev)) * padding_mask | |
| return sampled, mean, log_stddev | |
| def apply_weight_norm(self): | |
| self.wavenet.apply_weight_norm() | |
| def remove_weight_norm(self): | |
| self.wavenet.remove_weight_norm() | |
| def resize_speaker_embeddings(self, speaker_embedding_size: Optional[int] = None): | |
| self.wavenet.speaker_embedding_size = speaker_embedding_size | |
| hidden_size = self.wavenet.hidden_size | |
| num_layers = self.wavenet.num_layers | |
| cond_layer = torch.nn.Conv1d(speaker_embedding_size, 2 * hidden_size * num_layers, 1) | |
| self.wavenet.cond_layer = nn.utils.weight_norm(cond_layer, name="weight") | |
| nn.init.kaiming_normal_(self.wavenet.cond_layer.weight) | |
| if self.wavenet.cond_layer.bias is not None: | |
| k = math.sqrt( | |
| self.wavenet.cond_layer.groups | |
| / (self.wavenet.cond_layer.in_channels * self.wavenet.cond_layer.kernel_size[0]) | |
| ) | |
| nn.init.uniform_(self.wavenet.cond_layer.bias, a=-k, b=k) | |
| #............................................................................................. |