Upload 3 files
Browse files- README.md +15 -0
- config.yaml +70 -0
- volodarsky.spk.npy +3 -0
README.md
CHANGED
@@ -1,3 +1,18 @@
|
|
1 |
---
|
2 |
license: apache-2.0
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
license: apache-2.0
|
3 |
---
|
4 |
+
|
5 |
+
# volodarsky-so-vits-svc-5.0
|
6 |
+
|
7 |
+
Voice-2-Voice модель для преобразования голоса в голос пиратского переводчика Леонида Володарского для [PlayVoice/so-vits-svc-5.0](https://github.com/PlayVoice/so-vits-svc-5.0).
|
8 |
+
|
9 |
+
Датасет: [cwiz/leonid-volodarsky-tts](https://huggingface.co/datasets/cwiz/leonid-volodarsky-tts)
|
10 |
+
|
11 |
+
## Использование
|
12 |
+
|
13 |
+
1. Установите [PlayVoice/so-vits-svc-5.0](https://github.com/PlayVoice/so-vits-svc-5.0)
|
14 |
+
2. Скачайте config.yaml и volodarsky.spk.npy из с huggingface
|
15 |
+
3. Маскинг голоса:
|
16 |
+
```bash
|
17 |
+
python svc_inference.py --config config.yaml --model sovits5.0.pth --spk volodarsky.spk.npy --wave input.wav --shift 0
|
18 |
+
```
|
config.yaml
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
train:
|
2 |
+
model: "sovits"
|
3 |
+
seed: 1234
|
4 |
+
epochs: 10000
|
5 |
+
learning_rate: 5e-5
|
6 |
+
betas: [0.8, 0.99]
|
7 |
+
lr_decay: 0.999875
|
8 |
+
eps: 1e-9
|
9 |
+
batch_size: 11
|
10 |
+
c_stft: 9
|
11 |
+
c_mel: 1.
|
12 |
+
c_kl: 0.2
|
13 |
+
port: 8001
|
14 |
+
pretrain: "./vits_pretrain/sovits5.0.pretrain.pth"
|
15 |
+
#############################
|
16 |
+
data:
|
17 |
+
training_files: "files/train.txt"
|
18 |
+
validation_files: "files/valid.txt"
|
19 |
+
segment_size: 8000 # WARNING: base on hop_length
|
20 |
+
max_wav_value: 32768.0
|
21 |
+
sampling_rate: 32000
|
22 |
+
filter_length: 1024
|
23 |
+
hop_length: 320
|
24 |
+
win_length: 1024
|
25 |
+
mel_channels: 100
|
26 |
+
mel_fmin: 50.0
|
27 |
+
mel_fmax: 16000.0
|
28 |
+
#############################
|
29 |
+
vits:
|
30 |
+
ppg_dim: 1280
|
31 |
+
vec_dim: 256
|
32 |
+
spk_dim: 256
|
33 |
+
gin_channels: 256
|
34 |
+
inter_channels: 192
|
35 |
+
hidden_channels: 192
|
36 |
+
filter_channels: 640
|
37 |
+
#############################
|
38 |
+
gen:
|
39 |
+
upsample_input: 192
|
40 |
+
upsample_rates: [5, 4, 4, 2, 2]
|
41 |
+
upsample_kernel_sizes: [15, 8, 8, 4, 4]
|
42 |
+
upsample_initial_channel: 320
|
43 |
+
resblock_kernel_sizes: [3, 7, 11]
|
44 |
+
resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
|
45 |
+
#############################
|
46 |
+
mpd:
|
47 |
+
periods: [2, 3, 5, 7, 11]
|
48 |
+
kernel_size: 5
|
49 |
+
stride: 3
|
50 |
+
use_spectral_norm: False
|
51 |
+
lReLU_slope: 0.2
|
52 |
+
#############################
|
53 |
+
mrd:
|
54 |
+
resolutions: "[(1024, 120, 600), (2048, 240, 1200), (4096, 480, 2400), (512, 50, 240)]" # (filter_length, hop_length, win_length)
|
55 |
+
use_spectral_norm: False
|
56 |
+
lReLU_slope: 0.2
|
57 |
+
#############################
|
58 |
+
log:
|
59 |
+
info_interval: 100
|
60 |
+
eval_interval: 1
|
61 |
+
save_interval: 5
|
62 |
+
num_audio: 6
|
63 |
+
pth_dir: "chkpt"
|
64 |
+
log_dir: "logs"
|
65 |
+
keep_ckpts: 0
|
66 |
+
#############################
|
67 |
+
dist_config:
|
68 |
+
dist_backend: "nccl"
|
69 |
+
dist_url: "tcp://localhost:54321"
|
70 |
+
world_size: 1
|
volodarsky.spk.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4711a5447f8f083a5e7b46272c858ef76d32d23bd0ed1d07b056b128dbbb7df5
|
3 |
+
size 1152
|