cwiz commited on
Commit
f2c93fa
·
1 Parent(s): 7c1d13f

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +15 -0
  2. config.yaml +70 -0
  3. volodarsky.spk.npy +3 -0
README.md CHANGED
@@ -1,3 +1,18 @@
1
  ---
2
  license: apache-2.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+
5
+ # volodarsky-so-vits-svc-5.0
6
+
7
+ Voice-2-Voice модель для преобразования голоса в голос пиратского переводчика Леонида Володарского для [PlayVoice/so-vits-svc-5.0](https://github.com/PlayVoice/so-vits-svc-5.0).
8
+
9
+ Датасет: [cwiz/leonid-volodarsky-tts](https://huggingface.co/datasets/cwiz/leonid-volodarsky-tts)
10
+
11
+ ## Использование
12
+
13
+ 1. Установите [PlayVoice/so-vits-svc-5.0](https://github.com/PlayVoice/so-vits-svc-5.0)
14
+ 2. Скачайте config.yaml и volodarsky.spk.npy из с huggingface
15
+ 3. Маскинг голоса:
16
+ ```bash
17
+ python svc_inference.py --config config.yaml --model sovits5.0.pth --spk volodarsky.spk.npy --wave input.wav --shift 0
18
+ ```
config.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ train:
2
+ model: "sovits"
3
+ seed: 1234
4
+ epochs: 10000
5
+ learning_rate: 5e-5
6
+ betas: [0.8, 0.99]
7
+ lr_decay: 0.999875
8
+ eps: 1e-9
9
+ batch_size: 11
10
+ c_stft: 9
11
+ c_mel: 1.
12
+ c_kl: 0.2
13
+ port: 8001
14
+ pretrain: "./vits_pretrain/sovits5.0.pretrain.pth"
15
+ #############################
16
+ data:
17
+ training_files: "files/train.txt"
18
+ validation_files: "files/valid.txt"
19
+ segment_size: 8000 # WARNING: base on hop_length
20
+ max_wav_value: 32768.0
21
+ sampling_rate: 32000
22
+ filter_length: 1024
23
+ hop_length: 320
24
+ win_length: 1024
25
+ mel_channels: 100
26
+ mel_fmin: 50.0
27
+ mel_fmax: 16000.0
28
+ #############################
29
+ vits:
30
+ ppg_dim: 1280
31
+ vec_dim: 256
32
+ spk_dim: 256
33
+ gin_channels: 256
34
+ inter_channels: 192
35
+ hidden_channels: 192
36
+ filter_channels: 640
37
+ #############################
38
+ gen:
39
+ upsample_input: 192
40
+ upsample_rates: [5, 4, 4, 2, 2]
41
+ upsample_kernel_sizes: [15, 8, 8, 4, 4]
42
+ upsample_initial_channel: 320
43
+ resblock_kernel_sizes: [3, 7, 11]
44
+ resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
45
+ #############################
46
+ mpd:
47
+ periods: [2, 3, 5, 7, 11]
48
+ kernel_size: 5
49
+ stride: 3
50
+ use_spectral_norm: False
51
+ lReLU_slope: 0.2
52
+ #############################
53
+ mrd:
54
+ resolutions: "[(1024, 120, 600), (2048, 240, 1200), (4096, 480, 2400), (512, 50, 240)]" # (filter_length, hop_length, win_length)
55
+ use_spectral_norm: False
56
+ lReLU_slope: 0.2
57
+ #############################
58
+ log:
59
+ info_interval: 100
60
+ eval_interval: 1
61
+ save_interval: 5
62
+ num_audio: 6
63
+ pth_dir: "chkpt"
64
+ log_dir: "logs"
65
+ keep_ckpts: 0
66
+ #############################
67
+ dist_config:
68
+ dist_backend: "nccl"
69
+ dist_url: "tcp://localhost:54321"
70
+ world_size: 1
volodarsky.spk.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4711a5447f8f083a5e7b46272c858ef76d32d23bd0ed1d07b056b128dbbb7df5
3
+ size 1152