|
data:
|
|
block_size: 512
|
|
duration: 1.5
|
|
encoder: dpwavlmbase
|
|
encoder_ckpt: models/pretrained/dphubert/DPWavLM-sp0.75.pth
|
|
encoder_hop_size: 320
|
|
encoder_out_channels: 768
|
|
encoder_sample_rate: 16000
|
|
extensions:
|
|
- wav
|
|
- flac
|
|
- mp3
|
|
- m4a
|
|
f0_extractor: rmvpe
|
|
f0_max: 1200
|
|
f0_min: 65
|
|
sampling_rate: 44100
|
|
spk_embed_channels: 256
|
|
spk_embed_encoder: pyannote.audio
|
|
spk_embed_encoder_ckpt: ./models/pretrained/pyannote.audio/wespeaker-voxceleb-resnet34-LM/pytorch_model.bin
|
|
spk_embed_encoder_sample_rate: 16000
|
|
volume_window_size: 8
|
|
device: cuda
|
|
env:
|
|
gpu_id: 0
|
|
loss:
|
|
beta: 1.0
|
|
fft_max: 2048
|
|
fft_min: 128
|
|
gamma: 0.0
|
|
n_ffts:
|
|
- 32
|
|
- 64
|
|
- 128
|
|
- 256
|
|
- 512
|
|
- 1024
|
|
- 2048
|
|
overlap: 0.5
|
|
use_multi_scale_log_freq: true
|
|
model:
|
|
f0_input_variance: 0.0
|
|
f0_offset_size_downsamples: 16
|
|
harmonic_env_size_downsamples: 16
|
|
no_use_embed_conv: false
|
|
noise_env_size_downsamples: 16
|
|
noise_seed: 289
|
|
noise_to_harmonic_phase: false
|
|
type: CombSubMinimumNoisedPhase
|
|
units_hidden_channels: 256
|
|
units_layers:
|
|
- - 10
|
|
- 11
|
|
use_add_noise_env: false
|
|
use_discriminator: true
|
|
use_f0_offset: false
|
|
use_harmonic_env: true
|
|
use_noise_env: false
|
|
use_speaker_embed: true
|
|
win_length: 2048
|
|
train:
|
|
accelerator:
|
|
log_with: tensorboard
|
|
accelerator_project_config:
|
|
total_limit: 10
|
|
allow_tf32: true
|
|
amp_dtype: fp32
|
|
batch_size: 32
|
|
cache_all_data: true
|
|
cache_device: cpu
|
|
cache_fp16: true
|
|
epochs: 100
|
|
frame_hop_random_max: 64
|
|
frame_hop_random_min: 32
|
|
interval_log: 10
|
|
interval_val: 2000
|
|
loss_variation: 0.1
|
|
low_similar_loss_variation: 0.7
|
|
lr: 0.0003
|
|
num_workers: 2
|
|
only_u2c_stack: false
|
|
save_states: true
|
|
sched_cooldown: 2
|
|
sched_factor: 0.5
|
|
sched_gamma: 0.99999
|
|
sched_min_lr: 1.0e-05
|
|
sched_patience: 50
|
|
sched_threshold: 1.0e-05
|
|
sched_threshold_mode: rel
|
|
weight_decay: 1.0e-05
|
|
|