Spaces:
Running
Running
Update src/vc_infer_pipeline.py
Browse files- src/vc_infer_pipeline.py +19 -4
src/vc_infer_pipeline.py
CHANGED
|
@@ -77,7 +77,9 @@ class VC(object):
|
|
| 77 |
|
| 78 |
def get_optimal_torch_device(self, index: int = 0) -> torch.device:
|
| 79 |
if torch.cuda.is_available():
|
| 80 |
-
return torch.device(
|
|
|
|
|
|
|
| 81 |
elif torch.backends.mps.is_available():
|
| 82 |
return torch.device("mps")
|
| 83 |
return torch.device("cpu")
|
|
@@ -91,7 +93,9 @@ class VC(object):
|
|
| 91 |
hop_length=160,
|
| 92 |
model="full",
|
| 93 |
):
|
| 94 |
-
x = x.astype(
|
|
|
|
|
|
|
| 95 |
x /= np.quantile(np.abs(x), 0.999)
|
| 96 |
torch_device = self.get_optimal_torch_device()
|
| 97 |
audio = torch.from_numpy(x).to(torch_device, copy=True)
|
|
@@ -147,6 +151,12 @@ class VC(object):
|
|
| 147 |
f0 = f0[0].cpu().numpy()
|
| 148 |
return f0
|
| 149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
def get_f0_hybrid_computation(
|
| 151 |
self,
|
| 152 |
methods_str,
|
|
@@ -168,7 +178,10 @@ class VC(object):
|
|
| 168 |
x /= np.quantile(np.abs(x), 0.999)
|
| 169 |
for method in methods:
|
| 170 |
f0 = None
|
| 171 |
-
if method == "
|
|
|
|
|
|
|
|
|
|
| 172 |
f0 = self.get_f0_crepe_computation(
|
| 173 |
x, f0_min, f0_max, p_len, crepe_hop_length
|
| 174 |
)
|
|
@@ -234,7 +247,9 @@ class VC(object):
|
|
| 234 |
)
|
| 235 |
pad_size = (p_len - len(f0) + 1) // 2
|
| 236 |
if pad_size > 0 or p_len - len(f0) - pad_size > 0:
|
| 237 |
-
f0 = np.pad(
|
|
|
|
|
|
|
| 238 |
|
| 239 |
elif f0_method == "harvest":
|
| 240 |
input_audio_path2wav[input_audio_path] = x.astype(np.double)
|
|
|
|
| 77 |
|
| 78 |
def get_optimal_torch_device(self, index: int = 0) -> torch.device:
|
| 79 |
if torch.cuda.is_available():
|
| 80 |
+
return torch.device(
|
| 81 |
+
f"cuda:{index % torch.cuda.device_count()}"
|
| 82 |
+
)
|
| 83 |
elif torch.backends.mps.is_available():
|
| 84 |
return torch.device("mps")
|
| 85 |
return torch.device("cpu")
|
|
|
|
| 93 |
hop_length=160,
|
| 94 |
model="full",
|
| 95 |
):
|
| 96 |
+
x = x.astype(
|
| 97 |
+
np.float32
|
| 98 |
+
)
|
| 99 |
x /= np.quantile(np.abs(x), 0.999)
|
| 100 |
torch_device = self.get_optimal_torch_device()
|
| 101 |
audio = torch.from_numpy(x).to(torch_device, copy=True)
|
|
|
|
| 151 |
f0 = f0[0].cpu().numpy()
|
| 152 |
return f0
|
| 153 |
|
| 154 |
+
def get_f0_pyin_computation(self, x, f0_min, f0_max):
|
| 155 |
+
y, sr = librosa.load("saudio/Sidney.wav", self.sr, mono=True)
|
| 156 |
+
f0, _, _ = librosa.pyin(y, sr=self.sr, fmin=f0_min, fmax=f0_max)
|
| 157 |
+
f0 = f0[1:]
|
| 158 |
+
return f0
|
| 159 |
+
|
| 160 |
def get_f0_hybrid_computation(
|
| 161 |
self,
|
| 162 |
methods_str,
|
|
|
|
| 178 |
x /= np.quantile(np.abs(x), 0.999)
|
| 179 |
for method in methods:
|
| 180 |
f0 = None
|
| 181 |
+
if method == "crepe":
|
| 182 |
+
f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max)
|
| 183 |
+
f0 = f0[1:]
|
| 184 |
+
elif method == "mangio-crepe":
|
| 185 |
f0 = self.get_f0_crepe_computation(
|
| 186 |
x, f0_min, f0_max, p_len, crepe_hop_length
|
| 187 |
)
|
|
|
|
| 247 |
)
|
| 248 |
pad_size = (p_len - len(f0) + 1) // 2
|
| 249 |
if pad_size > 0 or p_len - len(f0) - pad_size > 0:
|
| 250 |
+
f0 = np.pad(
|
| 251 |
+
f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
|
| 252 |
+
)
|
| 253 |
|
| 254 |
elif f0_method == "harvest":
|
| 255 |
input_audio_path2wav[input_audio_path] = x.astype(np.double)
|