import os import gc import sys import torch import librosa import numpy as np import torch.nn.functional as F sys.path.append(os.getcwd()) from main.library import opencl def autotune_f0(note_dict, f0, f0_autotune_strength): autotuned_f0 = np.zeros_like(f0) for i, freq in enumerate(f0): autotuned_f0[i] = freq + (min(note_dict, key=lambda x: abs(x - freq)) - freq) * f0_autotune_strength return autotuned_f0 def change_rms(source_audio, source_rate, target_audio, target_rate, rate): rms2 = F.interpolate(torch.from_numpy(librosa.feature.rms(y=target_audio, frame_length=target_rate // 2 * 2, hop_length=target_rate // 2)).float().unsqueeze(0), size=target_audio.shape[0], mode="linear").squeeze() return (target_audio * (torch.pow(F.interpolate(torch.from_numpy(librosa.feature.rms(y=source_audio, frame_length=source_rate // 2 * 2, hop_length=source_rate // 2)).float().unsqueeze(0), size=target_audio.shape[0], mode="linear").squeeze(), 1 - rate) * torch.pow(torch.maximum(rms2, torch.zeros_like(rms2) + 1e-6), rate - 1)).numpy()) def clear_gpu_cache(): gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() elif torch.backends.mps.is_available(): torch.mps.empty_cache() elif opencl.is_available(): opencl.pytorch_ocl.empty_cache() def extract_median_f0(f0): f0 = np.where(f0 == 0, np.nan, f0) return float(np.median(np.interp(np.arange(len(f0)), np.where(~np.isnan(f0))[0], f0[~np.isnan(f0)]))) def proposal_f0_up_key(f0, target_f0 = 155.0, limit = 12): return max(-limit, min(limit, int(np.round(12 * np.log2(target_f0 / extract_median_f0(f0)))))) def get_onnx_argument(net_g, feats, p_len, sid, pitch, pitchf, energy, pitch_guidance, energy_use): inputs = { net_g.get_inputs()[0].name: feats.cpu().numpy().astype(np.float32), net_g.get_inputs()[1].name: p_len.cpu().numpy(), net_g.get_inputs()[2].name: np.array([sid.cpu().item()], dtype=np.int64), net_g.get_inputs()[3].name: np.random.randn(1, 192, p_len).astype(np.float32) } if energy_use: if pitch_guidance: inputs.update({ net_g.get_inputs()[4].name: pitch.cpu().numpy().astype(np.int64), net_g.get_inputs()[5].name: pitchf.cpu().numpy().astype(np.float32), net_g.get_inputs()[6].name: energy.cpu().numpy().astype(np.float32) }) else: inputs.update({ net_g.get_inputs()[4].name: energy.cpu().numpy().astype(np.float32) }) else: if pitch_guidance: inputs.update({ net_g.get_inputs()[4].name: pitch.cpu().numpy().astype(np.int64), net_g.get_inputs()[5].name: pitchf.cpu().numpy().astype(np.float32) }) return inputs