File size: 4,232 Bytes
381c43b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import librosa
import numpy as np


class SpectralFeatureExtractor:
    """
    A class to extract various spectral features from audio data using the librosa library.

    Attributes:
        y (numpy.array): Audio time series.
        sr (int): Sampling rate of the audio time series.

    Methods:
        extract(features_to_extract=None): Extracts specified spectral features from audio.
        spectral_centroid(): Computes the spectral centroid of the audio.
        spectral_bandwidth(): Computes the spectral bandwidth of the audio.
        spectral_contrast(): Computes the spectral contrast of the audio.
        spectral_flatness(): Computes the spectral flatness of the audio.
        spectral_rolloff(): Computes the spectral rolloff of the audio.
        zero_crossing_rate(): Computes the zero crossing rate of the audio.
        mfccs(): Computes the Mel-frequency cepstral coefficients (MFCCs) of the audio.
        chroma_stft(): Computes the chromagram from a waveform or power spectrogram.
        spectral_flux(): Computes the spectral flux of the audio.
    """
    def __init__(self, y, sr):
        """
        Initializes the SpectralFeatureExtractor with audio data.
        """
        self.y = y
        self.sr = sr

    def extract(self, features_to_extract=None):
        """
        Extracts the specified spectral features.
        
        Args:
            features_to_extract (list of str, optional): A list of feature names to extract.
                Defaults to extracting all available features if None.

        Returns:
            dict: A dictionary containing the extracted features.
        """
        feature_funcs = {
            'spectral_centroid': self.spectral_centroid,
            'spectral_bandwidth': self.spectral_bandwidth,
            'spectral_contrast': self.spectral_contrast,
            'spectral_flatness': self.spectral_flatness,
            'spectral_rolloff': self.spectral_rolloff,
            'zero_crossing_rate': self.zero_crossing_rate,
            'mfccs': self.mfccs,
            'chroma_stft': self.chroma_stft,
            'spectral_flux': self.spectral_flux
        }

        if features_to_extract is None:
            features_to_extract = feature_funcs.keys()

        features = {}
        for feature in features_to_extract:
            if feature in feature_funcs:
                features[feature] = feature_funcs[feature]()
        return features

    def spectral_centroid(self):
        """
        Computes the spectral centroid of the audio.
        """
        return librosa.feature.spectral_centroid(y=self.y, sr=self.sr).flatten()

    def spectral_bandwidth(self):
        """
        Computes the spectral bandwidth of the audio.
        """
        return librosa.feature.spectral_bandwidth(y=self.y, sr=self.sr).flatten()

    def spectral_contrast(self):
        """
        Computes the spectral contrast of the audio.
        """
        return librosa.feature.spectral_contrast(y=self.y, sr=self.sr).flatten()

    def spectral_flatness(self):
        """
        Computes the spectral flatness of the audio.
        """
        return librosa.feature.spectral_flatness(y=self.y).flatten()

    def spectral_rolloff(self):
        """
        Computes the spectral rolloff point of the audio.
        """
        return librosa.feature.spectral_rolloff(y=self.y, sr=self.sr).flatten()

    def zero_crossing_rate(self):
        """
        Computes the zero crossing rate of the audio.
        """
        return librosa.feature.zero_crossing_rate(self.y).flatten()

    def mfccs(self):
        """
        Computes the Mel-frequency cepstral coefficients (MFCCs) of the audio.
        """
        return librosa.feature.mfcc(y=self.y, sr=self.sr, n_mfcc=13).flatten()

    def chroma_stft(self):
        """
        Computes the chromagram from a waveform or power spectrogram.
        """
        return librosa.feature.chroma_stft(y=self.y, sr=self.sr).flatten()

    def spectral_flux(self):
        """
        Computes the spectral flux of the audio, indicating the rate of change in the power spectrum.
        """
        S = np.abs(librosa.stft(self.y))
        return np.sqrt(np.sum(np.diff(S, axis=1)**2, axis=0))