model_name: "cnn_vad" | |
# spec | |
sample_rate: 8000 | |
nfft: 512 | |
win_size: 240 | |
hop_size: 80 | |
win_type: hann | |
# model | |
conv2d_block_param_list: | |
- batch_norm: true | |
in_channels: 1 | |
out_channels: 8 | |
kernel_size: 3 | |
padding: "same" | |
dilation: 3 | |
activation: relu | |
dropout: 0.1 | |
- in_channels: 8 | |
out_channels: 8 | |
kernel_size: 5 | |
padding: "same" | |
dilation: 3 | |
activation: relu | |
dropout: 0.1 | |
- in_channels: 8 | |
out_channels: 8 | |
kernel_size: 3 | |
padding: "same" | |
dilation: 2 | |
activation: relu | |
dropout: 0.1 | |
encoder_output_size: 2056 | |
# lsnr | |
n_frame: 3 | |
min_local_snr_db: -15 | |
max_local_snr_db: 30 | |
norm_tau: 1. | |
# data | |
min_snr_db: -10 | |
max_snr_db: 20 | |
# train | |
lr: 0.001 | |
lr_scheduler: "CosineAnnealingLR" | |
lr_scheduler_kwargs: | |
T_max: 250000 | |
eta_min: 0.0001 | |
max_epochs: 100 | |
clip_grad_norm: 10.0 | |
seed: 1234 | |
num_workers: 4 | |
batch_size: 128 | |
eval_steps: 25000 | |