Spaces:
Paused
Paused
| import torch | |
| from TTS.vocoder.layers.pqmf import PQMF | |
| from TTS.vocoder.models.melgan_generator import MelganGenerator | |
| class MultibandMelganGenerator(MelganGenerator): | |
| def __init__( | |
| self, | |
| in_channels=80, | |
| out_channels=4, | |
| proj_kernel=7, | |
| base_channels=384, | |
| upsample_factors=(2, 8, 2, 2), | |
| res_kernel=3, | |
| num_res_blocks=3, | |
| ): | |
| super().__init__( | |
| in_channels=in_channels, | |
| out_channels=out_channels, | |
| proj_kernel=proj_kernel, | |
| base_channels=base_channels, | |
| upsample_factors=upsample_factors, | |
| res_kernel=res_kernel, | |
| num_res_blocks=num_res_blocks, | |
| ) | |
| self.pqmf_layer = PQMF(N=4, taps=62, cutoff=0.15, beta=9.0) | |
| def pqmf_analysis(self, x): | |
| return self.pqmf_layer.analysis(x) | |
| def pqmf_synthesis(self, x): | |
| return self.pqmf_layer.synthesis(x) | |
| def inference(self, cond_features): | |
| cond_features = cond_features.to(self.layers[1].weight.device) | |
| cond_features = torch.nn.functional.pad( | |
| cond_features, (self.inference_padding, self.inference_padding), "replicate" | |
| ) | |
| return self.pqmf_synthesis(self.layers(cond_features)) | |