Spaces:
Runtime error
Runtime error
# From https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI | |
""" | |
Copyright: RVC-Project | |
License: MIT | |
""" | |
import gc | |
import os | |
import traceback | |
import ffmpeg | |
import numpy as np | |
import torch.cuda | |
import argparse | |
import torch | |
import io | |
from multiprocessing import cpu_count | |
from fairseq import checkpoint_utils | |
from modules.voice_conversion.rvc.hubert.hubert_manager import HuBERTManager | |
from modules.voice_conversion.rvc.vc_infer_pipeline import VC | |
from modules.voice_conversion.rvc.infer_pack.models import ( | |
SynthesizerTrnMs256NSFsid, | |
SynthesizerTrnMs256NSFsid_nono, | |
SynthesizerTrnMs768NSFsid, | |
SynthesizerTrnMs768NSFsid_nono, | |
) | |
hubert_model = None | |
weight_root = os.path.join('') # ST HACK | |
def config_file_change_fp32(): | |
try: | |
for config_file in ["32k.json", "40k.json", "48k.json"]: | |
with open(f"configs/{config_file}", "r") as f: | |
strr = f.read().replace("true", "false") | |
with open(f"configs/{config_file}", "w") as f: | |
f.write(strr) | |
with open("trainset_preprocess_pipeline_print.py", "r") as f: | |
strr = f.read().replace("3.7", "3.0") | |
with open("trainset_preprocess_pipeline_print.py", "w") as f: | |
f.write(strr) | |
except Exception as e: | |
print(f'exception in config_file_change_fp32: {e}') | |
class Config: | |
def __init__(self): | |
self.device = "cuda:0" | |
self.is_half = True | |
self.n_cpu = 0 | |
self.gpu_name = None | |
self.gpu_mem = None | |
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config() | |
def device_config(self) -> tuple: | |
if torch.cuda.is_available(): | |
i_device = int(self.device.split(":")[-1]) | |
self.gpu_name = torch.cuda.get_device_name(i_device) | |
if ( | |
("16" in self.gpu_name and "V100" not in self.gpu_name.upper()) | |
or "P40" in self.gpu_name.upper() | |
or "1060" in self.gpu_name | |
or "1070" in self.gpu_name | |
or "1080" in self.gpu_name | |
): | |
print("Forcing full precision for 16/10 series cards.") | |
self.is_half = False | |
config_file_change_fp32() | |
else: | |
self.gpu_name = None | |
self.gpu_mem = int( | |
torch.cuda.get_device_properties(i_device).total_memory | |
/ 1024 | |
/ 1024 | |
/ 1024 | |
+ 0.4 | |
) | |
# if self.gpu_mem <= 4: | |
# with open("trainset_preprocess_pipeline_print.py", "r") as f: | |
# strr = f.read().replace("3.7", "3.0") | |
# with open("trainset_preprocess_pipeline_print.py", "w") as f: | |
# f.write(strr) | |
elif torch.backends.mps.is_available(): | |
print("No compatible GPU found, using MPS for inference.") | |
self.device = "mps" | |
self.is_half = False | |
config_file_change_fp32() | |
else: | |
print("No compatible GPU found, using CPU for inference.") | |
self.device = "cpu" | |
self.is_half = False | |
config_file_change_fp32() | |
if self.n_cpu == 0: | |
self.n_cpu = cpu_count() | |
if self.is_half: | |
# 6G显存配置 | |
x_pad = 3 | |
x_query = 10 | |
x_center = 60 | |
x_max = 65 | |
else: | |
# 5G显存配置 | |
x_pad = 1 | |
x_query = 6 | |
x_center = 38 | |
x_max = 41 | |
if self.gpu_mem != None and self.gpu_mem <= 4: | |
x_pad = 1 | |
x_query = 5 | |
x_center = 30 | |
x_max = 32 | |
return x_pad, x_query, x_center, x_max | |
config = Config() | |
def load_hubert(): | |
global hubert_model | |
if not hubert_model: | |
models, _, _ = checkpoint_utils.load_model_ensemble_and_task( | |
[HuBERTManager.make_sure_hubert_rvc_installed()], | |
suffix="", | |
) | |
hubert_model = models[0] | |
hubert_model = hubert_model.to(config.device) | |
if config.is_half: | |
hubert_model = hubert_model.half() | |
else: | |
hubert_model = hubert_model.float() | |
hubert_model.eval() | |
def load_audio(audio_source, sr): | |
try: | |
if isinstance(audio_source, str): # If it's a file path | |
audio_input = audio_source.strip(" ").strip('"').strip("\n").strip('"') | |
out, _ = ( | |
ffmpeg.input(audio_input, threads=0) | |
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr) | |
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True) | |
) | |
elif isinstance(audio_source, io.BytesIO): # If it's a BytesIO object | |
audio_source.seek(0) | |
out, _ = ( | |
ffmpeg.input("pipe:0", threads=0) | |
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr) | |
.run(input=audio_source.read(), cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True) | |
) | |
else: | |
raise ValueError("Invalid audio source") | |
except Exception as e: | |
raise RuntimeError(f"Failed to load audio: {e}") | |
return np.frombuffer(out, np.float32).flatten() | |
vc = None | |
rvc_model_name = None | |
maximum = 0 | |
def unload_rvc(): | |
global vc, rvc_model_name | |
rvc_model_name = None | |
vc = None | |
gc.collect() | |
torch.cuda.empty_cache() | |
def load_rvc(model): | |
global vc, rvc_model_name, maximum | |
if model != rvc_model_name: | |
unload_rvc() | |
rvc_model_name = model # correct for ST | |
# Load rvc | |
maximum = get_vc(model)['maximum'] | |
return maximum | |
def vc_single( | |
sid, | |
input_audio_path, | |
f0_up_key, | |
f0_file, | |
f0_method, | |
file_index, | |
file_index2, | |
# file_big_npy, | |
index_rate, | |
filter_radius, | |
resample_sr, | |
rms_mix_rate, | |
protect, | |
crepe_hop_length=128 | |
): # spk_item, input_audio0, vc_transform0,f0_file,f0method0 | |
global tgt_sr, net_g, vc, hubert_model, version | |
if input_audio_path is None: | |
return "You need to upload an audio", None | |
f0_up_key = int(f0_up_key) | |
try: | |
audio = load_audio(input_audio_path, 16000) | |
audio_max = np.abs(audio).max() / 0.95 | |
if audio_max > 1: | |
audio /= audio_max | |
times = [0, 0, 0] | |
if hubert_model is None: | |
load_hubert() | |
if_f0 = cpt.get("f0", 1) | |
file_index = ( | |
( | |
file_index.strip(" ") | |
.strip('"') | |
.strip("\n") | |
.strip('"') | |
.strip(" ") | |
.replace("trained", "added") | |
) | |
if file_index != "" | |
else file_index2 | |
) # 防止小白写错,自动帮他替换掉 | |
# file_big_npy = ( | |
# file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ") | |
# ) | |
audio_opt = vc.pipeline( | |
hubert_model, | |
net_g, | |
sid, | |
audio, | |
input_audio_path, | |
times, | |
f0_up_key, | |
f0_method, | |
file_index, | |
# file_big_npy, | |
index_rate, | |
if_f0, | |
filter_radius, | |
tgt_sr, | |
resample_sr, | |
rms_mix_rate, | |
version, | |
protect, | |
f0_file=f0_file, | |
crepe_hop_length=crepe_hop_length | |
) | |
if resample_sr >= 16000 and tgt_sr != resample_sr: | |
tgt_sr = resample_sr | |
index_info = ( | |
"Using index:%s." % file_index | |
if os.path.exists(file_index) | |
else "Index not used." | |
) | |
return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % ( | |
index_info, | |
times[0], | |
times[1], | |
times[2], | |
), (tgt_sr, audio_opt) | |
except: | |
info = traceback.format_exc() | |
print(info) | |
return info, (None, None) | |
# 一个选项卡全局只能有一个音色 | |
def get_vc(sid): | |
global n_spk, tgt_sr, net_g, vc, cpt, version | |
if sid == "" or sid == []: | |
global hubert_model | |
if hubert_model is not None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的 | |
print("clean_empty_cache") | |
del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt | |
hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None | |
if torch.cuda.is_available(): | |
torch.cuda.empty_cache() | |
###楼下不这么折腾清理不干净 | |
if_f0 = cpt.get("f0", 1) | |
version = cpt.get("version", "v1") | |
if version == "v1": | |
if if_f0 == 1: | |
net_g = SynthesizerTrnMs256NSFsid( | |
*cpt["config"], is_half=config.is_half | |
) | |
else: | |
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) | |
elif version == "v2": | |
if if_f0 == 1: | |
net_g = SynthesizerTrnMs768NSFsid( | |
*cpt["config"], is_half=config.is_half | |
) | |
else: | |
net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) | |
del net_g, cpt | |
if torch.cuda.is_available(): | |
torch.cuda.empty_cache() | |
cpt = None | |
return {"visible": False, "__type__": "update"} | |
#person = "%s/%s" % (weight_root, sid) # ST HACK | |
person = sid | |
print("loading %s" % person) | |
cpt = torch.load(person, map_location="cpu") | |
tgt_sr = cpt["config"][-1] | |
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk | |
if_f0 = cpt.get("f0", 1) | |
version = cpt.get("version", "v1") | |
if version == "v1": | |
if if_f0 == 1: | |
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half) | |
else: | |
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) | |
elif version == "v2": | |
if if_f0 == 1: | |
net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half) | |
else: | |
net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) | |
del net_g.enc_q | |
print(net_g.load_state_dict(cpt["weight"], strict=False)) | |
net_g.eval().to(config.device) | |
if config.is_half: | |
net_g = net_g.half() | |
else: | |
net_g = net_g.float() | |
vc = VC(tgt_sr, config) | |
n_spk = cpt["config"][-3] | |
return {"visible": True, "maximum": n_spk, "__type__": "update"} | |
def change_info(path, info, name): | |
try: | |
ckpt = torch.load(path, map_location="cpu") | |
ckpt["info"] = info | |
if name == "": | |
name = os.path.basename(path) | |
torch.save(ckpt, "weights/%s" % name) | |
return "Success." | |
except: | |
return traceback.format_exc() | |
def change_info_(ckpt_path): | |
if not os.path.exists(ckpt_path.replace(os.path.basename(ckpt_path), "train.log")): | |
return | |
try: | |
with open( | |
ckpt_path.replace(os.path.basename(ckpt_path), "train.log"), "r" | |
) as f: | |
info = eval(f.read().strip("\n").split("\n")[0].split("\t")[-1]) | |
sr, f0 = info["sample_rate"], info["if_f0"] | |
version = "v2" if ("version" in info and info["version"] == "v2") else "v1" | |
return sr, str(f0), version | |
except: | |
traceback.print_exc() | |