RVC-GUI / main /app /core /f0_extract.py
AnhP's picture
Upload 170 files
1e4a2ab verified
raw
history blame
1.77 kB
import os
import sys
import librosa
import numpy as np
import matplotlib.pyplot as plt
sys.path.append(os.getcwd())
from main.library.utils import check_assets
from main.app.core.ui import gr_info, gr_warning
from main.library.predictors.Generator import Generator
from main.app.variables import config, translations, configs
def f0_extract(audio, f0_method, f0_onnx):
if not audio or not os.path.exists(audio) or os.path.isdir(audio):
gr_warning(translations["input_not_valid"])
return [None]*2
check_assets(f0_method, None, f0_onnx, None)
f0_path = os.path.join(configs["f0_path"], os.path.splitext(os.path.basename(audio))[0])
image_path = os.path.join(f0_path, "f0.png")
txt_path = os.path.join(f0_path, "f0.txt")
gr_info(translations["start_extract"])
if not os.path.exists(f0_path): os.makedirs(f0_path, exist_ok=True)
y, sr = librosa.load(audio, sr=None)
f0_generator = Generator(sr, 160, 50, 1600, is_half=config.is_half, device=config.device, f0_onnx_mode=f0_onnx, del_onnx_model=f0_onnx)
_, pitchf = f0_generator.calculator(config.x_pad, f0_method, y, 0, None, 3, False, 0, None, False)
F_temp = np.array(pitchf, dtype=np.float32)
F_temp[F_temp == 0] = np.nan
f0 = 1200 * np.log2(F_temp / librosa.midi_to_hz(0))
plt.figure(figsize=(10, 4))
plt.plot(f0)
plt.title(f0_method)
plt.xlabel(translations["time_frames"])
plt.ylabel(translations["Frequency"])
plt.savefig(image_path)
plt.close()
with open(txt_path, "w") as f:
for i, f0_value in enumerate(f0):
f.write(f"{i * sr / 160},{f0_value}\n")
gr_info(translations["extract_done"])
return [txt_path, image_path]