|
train: |
|
model: "sovits" |
|
seed: 1234 |
|
epochs: 10000 |
|
learning_rate: 5e-5 |
|
betas: [0.8, 0.99] |
|
lr_decay: 0.999875 |
|
eps: 1e-9 |
|
batch_size: 11 |
|
c_stft: 9 |
|
c_mel: 1. |
|
c_kl: 0.2 |
|
port: 8001 |
|
pretrain: "./vits_pretrain/sovits5.0.pretrain.pth" |
|
|
|
data: |
|
training_files: "files/train.txt" |
|
validation_files: "files/valid.txt" |
|
segment_size: 8000 |
|
max_wav_value: 32768.0 |
|
sampling_rate: 32000 |
|
filter_length: 1024 |
|
hop_length: 320 |
|
win_length: 1024 |
|
mel_channels: 100 |
|
mel_fmin: 50.0 |
|
mel_fmax: 16000.0 |
|
|
|
vits: |
|
ppg_dim: 1280 |
|
vec_dim: 256 |
|
spk_dim: 256 |
|
gin_channels: 256 |
|
inter_channels: 192 |
|
hidden_channels: 192 |
|
filter_channels: 640 |
|
|
|
gen: |
|
upsample_input: 192 |
|
upsample_rates: [5, 4, 4, 2, 2] |
|
upsample_kernel_sizes: [15, 8, 8, 4, 4] |
|
upsample_initial_channel: 320 |
|
resblock_kernel_sizes: [3, 7, 11] |
|
resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]] |
|
|
|
mpd: |
|
periods: [2, 3, 5, 7, 11] |
|
kernel_size: 5 |
|
stride: 3 |
|
use_spectral_norm: False |
|
lReLU_slope: 0.2 |
|
|
|
mrd: |
|
resolutions: "[(1024, 120, 600), (2048, 240, 1200), (4096, 480, 2400), (512, 50, 240)]" |
|
use_spectral_norm: False |
|
lReLU_slope: 0.2 |
|
|
|
log: |
|
info_interval: 100 |
|
eval_interval: 1 |
|
save_interval: 5 |
|
num_audio: 6 |
|
pth_dir: "chkpt" |
|
log_dir: "logs" |
|
keep_ckpts: 0 |
|
|
|
dist_config: |
|
dist_backend: "nccl" |
|
dist_url: "tcp://localhost:54321" |
|
world_size: 1 |
|
|