Spaces:
Runtime error
Runtime error
Create infer_rvc.py
Browse files- infer_rvc.py +41 -0
infer_rvc.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import librosa
|
3 |
+
import numpy as np
|
4 |
+
import soundfile as sf
|
5 |
+
import argparse
|
6 |
+
import os
|
7 |
+
|
8 |
+
def load_rvc_model(model_path):
|
9 |
+
"""Load the RVC voice conversion model."""
|
10 |
+
if not os.path.exists(model_path):
|
11 |
+
raise FileNotFoundError(f"Model not found: {model_path}")
|
12 |
+
model = torch.load(model_path, map_location="cuda" if torch.cuda.is_available() else "cpu")
|
13 |
+
model.eval()
|
14 |
+
return model
|
15 |
+
|
16 |
+
def convert_voice(input_audio, output_audio, model_path, index_path, pitch_shift=0):
|
17 |
+
"""Convert input audio using RVC."""
|
18 |
+
model = load_rvc_model(model_path)
|
19 |
+
|
20 |
+
audio, sr = librosa.load(input_audio, sr=44100)
|
21 |
+
|
22 |
+
# Apply conversion
|
23 |
+
audio = torch.tensor(audio, dtype=torch.float32).unsqueeze(0)
|
24 |
+
|
25 |
+
with torch.no_grad():
|
26 |
+
converted_audio = model(audio) # Convert using RVC model
|
27 |
+
|
28 |
+
# Save output
|
29 |
+
sf.write(output_audio, converted_audio.numpy().squeeze(), sr)
|
30 |
+
print(f"Converted voice saved to {output_audio}")
|
31 |
+
|
32 |
+
if __name__ == "__main__":
|
33 |
+
parser = argparse.ArgumentParser()
|
34 |
+
parser.add_argument("--input", required=True, help="Input TTS audio file")
|
35 |
+
parser.add_argument("--output", required=True, help="Output converted audio file")
|
36 |
+
parser.add_argument("--model", required=True, help="Path to RVC model (e.g., zeldabotw.pth)")
|
37 |
+
parser.add_argument("--index", required=True, help="Path to RVC index file (e.g., zeldabotw.index)")
|
38 |
+
parser.add_argument("--pitch_shift", type=int, default=0, help="Pitch shift value")
|
39 |
+
|
40 |
+
args = parser.parse_args()
|
41 |
+
convert_voice(args.input, args.output, args.model, args.index, args.pitch_shift)
|