Spaces:
Running
Running
Update src/vc_infer_pipeline.py
Browse files- src/vc_infer_pipeline.py +26 -15
src/vc_infer_pipeline.py
CHANGED
|
@@ -1,14 +1,12 @@
|
|
| 1 |
-
from functools import lru_cache
|
| 2 |
import numpy as np, parselmouth, torch, pdb, sys, os
|
| 3 |
from time import time as ttime
|
| 4 |
import torch.nn.functional as F
|
| 5 |
import torchcrepe
|
| 6 |
-
from scipy import signal
|
| 7 |
from torch import Tensor
|
| 8 |
-
import
|
| 9 |
-
import
|
| 10 |
-
import
|
| 11 |
-
import
|
| 12 |
|
| 13 |
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 14 |
now_dir = os.path.join(BASE_DIR, 'src')
|
|
@@ -37,14 +35,23 @@ def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
|
|
| 37 |
|
| 38 |
|
| 39 |
def change_rms(data1, sr1, data2, sr2, rate):
|
| 40 |
-
rms1 = librosa.feature.rms(
|
|
|
|
|
|
|
| 41 |
rms2 = librosa.feature.rms(y=data2, frame_length=sr2 // 2 * 2, hop_length=sr2 // 2)
|
| 42 |
rms1 = torch.from_numpy(rms1)
|
| 43 |
-
rms1 = F.interpolate(
|
|
|
|
|
|
|
| 44 |
rms2 = torch.from_numpy(rms2)
|
| 45 |
-
rms2 = F.interpolate(
|
|
|
|
|
|
|
| 46 |
rms2 = torch.max(rms2, torch.zeros_like(rms2) + 1e-6)
|
| 47 |
-
data2 *= (
|
|
|
|
|
|
|
|
|
|
| 48 |
return data2
|
| 49 |
|
| 50 |
|
|
@@ -454,12 +461,16 @@ class VC(object):
|
|
| 454 |
crepe_hop_length,
|
| 455 |
f0_file=None,
|
| 456 |
):
|
| 457 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
| 458 |
try:
|
| 459 |
index = faiss.read_index(file_index)
|
| 460 |
big_npy = index.reconstruct_n(0, index.ntotal)
|
| 461 |
-
except
|
| 462 |
-
|
| 463 |
index = big_npy = None
|
| 464 |
else:
|
| 465 |
index = big_npy = None
|
|
@@ -494,8 +505,8 @@ class VC(object):
|
|
| 494 |
for line in lines:
|
| 495 |
inp_f0.append([float(i) for i in line.split(",")])
|
| 496 |
inp_f0 = np.array(inp_f0, dtype="float32")
|
| 497 |
-
except
|
| 498 |
-
|
| 499 |
sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
|
| 500 |
pitch, pitchf = None, None
|
| 501 |
if if_f0 == 1:
|
|
|
|
|
|
|
| 1 |
import numpy as np, parselmouth, torch, pdb, sys, os
|
| 2 |
from time import time as ttime
|
| 3 |
import torch.nn.functional as F
|
| 4 |
import torchcrepe
|
|
|
|
| 5 |
from torch import Tensor
|
| 6 |
+
import scipy.signal as signal
|
| 7 |
+
import pyworld, os, traceback, faiss, librosa, torchcrepe
|
| 8 |
+
from scipy import signal
|
| 9 |
+
from functools import lru_cache
|
| 10 |
|
| 11 |
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 12 |
now_dir = os.path.join(BASE_DIR, 'src')
|
|
|
|
| 35 |
|
| 36 |
|
| 37 |
def change_rms(data1, sr1, data2, sr2, rate):
|
| 38 |
+
rms1 = librosa.feature.rms(
|
| 39 |
+
y=data1, frame_length=sr1 // 2 * 2, hop_length=sr1 // 2
|
| 40 |
+
)
|
| 41 |
rms2 = librosa.feature.rms(y=data2, frame_length=sr2 // 2 * 2, hop_length=sr2 // 2)
|
| 42 |
rms1 = torch.from_numpy(rms1)
|
| 43 |
+
rms1 = F.interpolate(
|
| 44 |
+
rms1.unsqueeze(0), size=data2.shape[0], mode="linear"
|
| 45 |
+
).squeeze()
|
| 46 |
rms2 = torch.from_numpy(rms2)
|
| 47 |
+
rms2 = F.interpolate(
|
| 48 |
+
rms2.unsqueeze(0), size=data2.shape[0], mode="linear"
|
| 49 |
+
).squeeze()
|
| 50 |
rms2 = torch.max(rms2, torch.zeros_like(rms2) + 1e-6)
|
| 51 |
+
data2 *= (
|
| 52 |
+
torch.pow(rms1, torch.tensor(1 - rate))
|
| 53 |
+
* torch.pow(rms2, torch.tensor(rate - 1))
|
| 54 |
+
).numpy()
|
| 55 |
return data2
|
| 56 |
|
| 57 |
|
|
|
|
| 461 |
crepe_hop_length,
|
| 462 |
f0_file=None,
|
| 463 |
):
|
| 464 |
+
if (
|
| 465 |
+
file_index != ""
|
| 466 |
+
and os.path.exists(file_index) == True
|
| 467 |
+
and index_rate != 0
|
| 468 |
+
):
|
| 469 |
try:
|
| 470 |
index = faiss.read_index(file_index)
|
| 471 |
big_npy = index.reconstruct_n(0, index.ntotal)
|
| 472 |
+
except:
|
| 473 |
+
traceback.print_exc()
|
| 474 |
index = big_npy = None
|
| 475 |
else:
|
| 476 |
index = big_npy = None
|
|
|
|
| 505 |
for line in lines:
|
| 506 |
inp_f0.append([float(i) for i in line.split(",")])
|
| 507 |
inp_f0 = np.array(inp_f0, dtype="float32")
|
| 508 |
+
except:
|
| 509 |
+
traceback.print_exc()
|
| 510 |
sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
|
| 511 |
pitch, pitchf = None, None
|
| 512 |
if if_f0 == 1:
|