import librosa import numpy as np # Function to resample the audio array def resample_audio(array, orig_sr, target_sr): array = np.array(array) # Ensure it's a numpy array if orig_sr != target_sr: array = librosa.resample(array, orig_sr=orig_sr, target_sr=target_sr) return array def create_mel_spectrogram(waveform, sr, n_mels=128, n_fft=2048, hop_length=512): """ Create a Mel spectrogram from a waveform. Args: waveform (np.ndarray): 1D NumPy array of the audio waveform. sr (int): Sampling rate of the waveform. n_mels (int): Number of Mel bands to generate. n_fft (int): Length of the FFT window. hop_length (int): Number of samples between successive frames. Returns: np.ndarray: 2D NumPy array of the Mel spectrogram (shape: [n_mels, time]). """ # Create Mel spectrogram mel_spectrogram = librosa.feature.melspectrogram( y=waveform, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels ) # Convert power spectrogram (amplitude squared) to decibel (log scale) mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max) # Ensure consistent length for each feature #log_mel_spec = librosa.util.fix_length(log_mel_spec, 1300) return mel_spectrogram_db