warisqr7
/

accent-id-commonaccent_xlsr-en-english

Audio Classification

Accent Identification

Model card Files Files and versions

warisqr7 commited on Sep 9, 2024

Commit

9a56940

·

verified ·

1 Parent(s): 93fa313

Update custom_interface.py

Files changed (1) hide show

custom_interface.py +23 -6

custom_interface.py CHANGED Viewed

@@ -121,6 +121,28 @@ class CustomEncoderWav2vec2Classifier(Pretrained):
         text_lab = self.hparams.label_encoder.decode_torch(index)
         return out_prob, score, index, text_lab
     def classify_file(self, path):
         """Classifies the given audiofile into the given set of labels.
@@ -141,12 +163,7 @@ class CustomEncoderWav2vec2Classifier(Pretrained):
             List with the text labels corresponding to the indexes.
             (label encoder should be provided).
         """
-        waveform = self.load_audio(path)
-        # Fake a batch:
-        batch = waveform.unsqueeze(0)
-        rel_length = torch.tensor([1.0])
-        outputs = self.encode_batch(batch, rel_length)
-        outputs = self.mods.output_mlp(outputs).squeeze(1)
         out_prob = self.hparams.softmax(outputs)
         score, index = torch.max(out_prob, dim=-1)
         text_lab = self.hparams.label_encoder.decode_torch(index)

         text_lab = self.hparams.label_encoder.decode_torch(index)
         return out_prob, score, index, text_lab
+    def embed_file(self, path):
+        """Returns embedding (last layer output) for the given audiofile.
+        Arguments
+        ---------
+        path : str
+            Path to audio file to classify.
+        Returns
+        -------
+        embed
+            The log posterior probabilities of each class ([batch, embed_dim])
+        """
+        waveform = self.load_audio(path)
+        # Fake a batch:
+        batch = waveform.unsqueeze(0)
+        rel_length = torch.tensor([1.0])
+        outputs = self.encode_batch(batch, rel_length)
+        outputs = self.mods.output_mlp(outputs).squeeze(1)
+        return outputs
     def classify_file(self, path):
         """Classifies the given audiofile into the given set of labels.
             List with the text labels corresponding to the indexes.
             (label encoder should be provided).
         """
+        outputs = self.embed_file(path)
         out_prob = self.hparams.softmax(outputs)
         score, index = torch.max(out_prob, dim=-1)
         text_lab = self.hparams.label_encoder.decode_torch(index)