File size: 2,234 Bytes
bba3e85 b80774a bba3e85 b80774a bba3e85 b80774a bba3e85 b80774a bba3e85 b80774a bba3e85 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
############################# Inference ###################################################
# #################################
# Basic inference parameters for speaker-id. We have first a network that
# computes some embeddings. On the top of that, we employ a classifier.
#
# Author:
# * Mirco Ravanelli 2021
# * Kunnar Kukk 2022
# #################################
# pretrain folders:
pretrained_path: TalTechNLP/voxlingua107-xls-r-300m-wav2vec
# Model parameters
sample_rate: 16000
device: 'cpu'
# Feature extraction
compute_features: !new:speechbrain.lobes.features.Fbank
n_mels: 60
######################## Wav2Vec ########################
# URL for the wav2vec2 model.
wav2vec2_hub: facebook/wav2vec2-xls-r-300m
freeze_wav2vec: True
save_folder: ./save
wav2vec2: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
source: !ref <wav2vec2_hub>
output_norm: True
freeze: !ref <freeze_wav2vec>
save_path: !ref <save_folder>/wav2vec2_checkpoint
out_neurons: 107
classifier: !new:speechbrain.lobes.models.Xvector.Classifier
input_shape: [null, null, 2048]
activation: !name:torch.nn.LeakyReLU
lin_blocks: 1
lin_neurons: 512
out_neurons: !ref <out_neurons>
label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
attentive: !new:speechbrain.lobes.models.ECAPA_TDNN.AttentiveStatisticsPooling
channels: 1024
attention_channels: 64
modules:
wav2vec2: !ref <wav2vec2>
compute_features: !ref <compute_features>
classifier: !ref <classifier>
attentive: !ref <attentive>
softmax: !ref <softmax>
model: !new:torch.nn.ModuleList
- [!ref <attentive>, !ref <classifier>]
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
loadables:
wav2vec2: !ref <wav2vec2>
classifier: !ref <classifier>
label_encoder: !ref <label_encoder>
model: !ref <model>
paths:
wav2vec2: !ref <pretrained_path>/wav2vec2.ckpt
classifier: !ref <pretrained_path>/classifier.ckpt
label_encoder: !ref <pretrained_path>/label_encoder.txt
model: !ref <pretrained_path>/model.ckpt
##################
softmax: !new:speechbrain.nnet.activations.Softmax
apply_log: True |