github-actions[bot]
Sync from https://github.com/JacobLinCool/zero-rvc
2d9b22b
import os
import torch
import torch.nn as nn
import numpy as np
import librosa
from .stft import STFT, TorchSTFT
USING_TORCH_STFT = os.getenv("USING_TORCH_STFT") is not None
class MelSpectrogram(nn.Module):
def __init__(
self,
n_mel_channels: int,
sampling_rate: int,
win_length: int,
hop_length: int,
n_fft: int = None,
mel_fmin: int = 0,
mel_fmax: int = None,
clamp: float = 1e-5,
):
super().__init__()
n_fft = win_length if n_fft is None else n_fft
mel_basis = librosa.filters.mel(
sr=sampling_rate,
n_fft=n_fft,
n_mels=n_mel_channels,
fmin=mel_fmin,
fmax=mel_fmax,
htk=True,
)
mel_basis = torch.from_numpy(mel_basis).float()
self.register_buffer("mel_basis", mel_basis, persistent=False)
self.n_fft = n_fft
self.hop_length = hop_length
self.win_length = win_length
self.sampling_rate = sampling_rate
self.n_mel_channels = n_mel_channels
self.clamp = clamp
self.keyshift = 0
self.speed = 1
self.factor = 2 ** (self.keyshift / 12)
self.n_fft_new = int(np.round(self.n_fft * self.factor))
self.win_length_new = int(np.round(self.win_length * self.factor))
self.hop_length_new = int(np.round(self.hop_length * self.speed))
if USING_TORCH_STFT:
self.stft = TorchSTFT(
filter_length=self.n_fft_new,
hop_length=self.hop_length_new,
win_length=self.win_length_new,
window="hann",
)
else:
self.stft = STFT(
filter_length=self.n_fft_new,
hop_length=self.hop_length_new,
win_length=self.win_length_new,
window="hann",
)
def forward(self, audio: torch.Tensor):
magnitude = self.stft(audio)
mel_output = torch.matmul(self.mel_basis, magnitude)
log_mel_spec = torch.log(torch.clamp(mel_output, min=self.clamp))
return log_mel_spec