TalTechNLP
/

voxlingua107-xls-r-300m-wav2vec

Audio Classification

language-identification

Model card Files Files and versions

kunnark commited on Apr 14, 2022

Commit

bba3e85

·

1 Parent(s): de2e05d

First model commit.

Files changed (1) hide show

inference_wav2vec.yaml +75 -0

inference_wav2vec.yaml ADDED Viewed

	@@ -0,0 +1,75 @@

+############################# Inference ###################################################
+# #################################
+# Basic inference parameters for speaker-id. We have first a network that
+# computes some embeddings. On the top of that, we employ a classifier.
+#
+# Author:
+#  * Mirco Ravanelli 2021
+#  * Kunnar Kukk 2022
+# #################################
+# pretrain folders:
+pretrained_path: ./
+# Model parameters
+sample_rate: 16000
+device: 'cpu'
+# Feature extraction
+compute_features: !new:speechbrain.lobes.features.Fbank
+    n_mels: 60
+######################## Wav2Vec ########################
+# URL for the wav2vec2 model.
+wav2vec2_hub: facebook/wav2vec2-large-xlsr-53
+freeze_wav2vec: True # False
+save_folder: ./save
+wav2vec2: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
+    source: !ref <wav2vec2_hub>
+    output_norm: True
+    freeze: !ref <freeze_wav2vec>
+    save_path: !ref <save_folder>/wav2vec2_checkpoint
+out_neurons: 107
+classifier: !new:speechbrain.lobes.models.Xvector.Classifier
+    input_shape: [null, null, 2048]
+    activation: !name:torch.nn.LeakyReLU
+    lin_blocks: 1
+    lin_neurons: 512
+    out_neurons: !ref <out_neurons>
+label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
+attentive: !new:speechbrain.lobes.models.ECAPA_TDNN.AttentiveStatisticsPooling
+    channels: 1024
+    attention_channels: 64
+modules:
+    wav2vec2: !ref <wav2vec2> # Added
+    compute_features: !ref <compute_features>
+    classifier: !ref <classifier>
+    attentive: !ref <attentive> # Added
+    softmax: !ref <softmax> # Added
+model: !new:torch.nn.ModuleList
+   - [!ref <attentive>, !ref <classifier>]
+pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
+    loadables:
+        wav2vec2: !ref <wav2vec2>
+        classifier: !ref <classifier>
+        label_encoder: !ref <label_encoder>
+        model: !ref <model>
+    paths:
+        wav2vec2: !ref <pretrained_path>/wav2vec2.ckpt
+        classifier: !ref <pretrained_path>/classifier.ckpt
+        label_encoder: <pretrained_path>/label_encoder.txt
+        model: !ref <pretrained_path>/model.ckpt
+##################
+softmax: !new:speechbrain.nnet.activations.Softmax
+    apply_log: True