File size: 1,347 Bytes
6dacae5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import librosa
import numpy as np


# Function to resample the audio array
def resample_audio(array, orig_sr, target_sr):
    array = np.array(array)  # Ensure it's a numpy array
    if orig_sr != target_sr:
        array = librosa.resample(array, orig_sr=orig_sr, target_sr=target_sr)
    return array


def create_mel_spectrogram(waveform, sr, n_mels=128, n_fft=2048, hop_length=512):
    """
    Create a Mel spectrogram from a waveform.

    Args:
        waveform (np.ndarray): 1D NumPy array of the audio waveform.
        sr (int): Sampling rate of the waveform.
        n_mels (int): Number of Mel bands to generate.
        n_fft (int): Length of the FFT window.
        hop_length (int): Number of samples between successive frames.

    Returns:
        np.ndarray: 2D NumPy array of the Mel spectrogram (shape: [n_mels, time]).
    """
    # Create Mel spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(
        y=waveform,
        sr=sr,
        n_fft=n_fft,
        hop_length=hop_length,
        n_mels=n_mels
    )
    
    # Convert power spectrogram (amplitude squared) to decibel (log scale)
    mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)

    # Ensure consistent length for each feature
    #log_mel_spec = librosa.util.fix_length(log_mel_spec, 1300)
    
    return mel_spectrogram_db