deveix commited on
Commit
39bd3a6
·
1 Parent(s): c589841

fix opensmile

Browse files
Files changed (1) hide show
  1. app/main.py +26 -18
app/main.py CHANGED
@@ -17,6 +17,7 @@ import pandas as pd
17
  import numpy as np
18
  import librosa.display
19
  import soundfile as sf
 
20
 
21
 
22
  load_dotenv()
@@ -198,32 +199,39 @@ def preprocess_audio(path, save_dir):
198
 
199
  return 'success'
200
 
 
 
 
 
201
 
202
  def extract_features(file_path):
203
- # Load the audio file
204
- y, sr = librosa.load(file_path, sr=None, dtype=np.float32)
 
 
 
 
205
 
206
- # Extract MFCCs
207
- mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
208
- mfccs_mean = pd.Series(mfccs.mean(axis=1), index=[f'mfcc_{i}' for i in range(mfccs.shape[0])])
 
 
209
 
210
- # Extract Spectral Features
211
- spectral_centroids = pd.Series(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)), index=['spectral_centroid'])
212
- spectral_rolloff = pd.Series(np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)), index=['spectral_rolloff'])
213
- spectral_flux = pd.Series(np.mean(librosa.onset.onset_strength(y=y, sr=sr)), index=['spectral_flux'])
214
- spectral_contrast = pd.Series(np.mean(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr), axis=1), index=[f'spectral_contrast_{i}' for i in range(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr).shape[0])])
215
 
216
- # Extract Pitch
217
- pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
218
- pitch_mean = pd.Series(np.mean(pitches[pitches != 0]), index=['pitch_mean']) # Average only non-zero values
219
 
220
- # Extract Zero Crossings
221
- zero_crossings = pd.Series(np.mean(librosa.feature.zero_crossing_rate(y)), index=['zero_crossings'])
222
 
223
- # Combine all features into a single Series
224
- features = pd.concat([mfccs_mean, spectral_centroids, spectral_rolloff, spectral_flux, spectral_contrast, pitch_mean, zero_crossings])
225
 
226
- return pd.DataFrame([features])
227
 
228
 
229
  @app.post("/mlp")
 
17
  import numpy as np
18
  import librosa.display
19
  import soundfile as sf
20
+ import opensmile
21
 
22
 
23
  load_dotenv()
 
199
 
200
  return 'success'
201
 
202
+ smile = opensmile.Smile(
203
+ feature_set=opensmile.FeatureSet.ComParE_2016,
204
+ feature_level=opensmile.FeatureLevel.Functionals,
205
+ )
206
 
207
  def extract_features(file_path):
208
+ # # Load the audio file
209
+ # y, sr = librosa.load(file_path, sr=None, dtype=np.float32)
210
+
211
+ # # Extract MFCCs
212
+ # mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
213
+ # mfccs_mean = pd.Series(mfccs.mean(axis=1), index=[f'mfcc_{i}' for i in range(mfccs.shape[0])])
214
 
215
+ # # Extract Spectral Features
216
+ # spectral_centroids = pd.Series(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)), index=['spectral_centroid'])
217
+ # spectral_rolloff = pd.Series(np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)), index=['spectral_rolloff'])
218
+ # spectral_flux = pd.Series(np.mean(librosa.onset.onset_strength(y=y, sr=sr)), index=['spectral_flux'])
219
+ # spectral_contrast = pd.Series(np.mean(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr), axis=1), index=[f'spectral_contrast_{i}' for i in range(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr).shape[0])])
220
 
221
+ # # Extract Pitch
222
+ # pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
223
+ # pitch_mean = pd.Series(np.mean(pitches[pitches != 0]), index=['pitch_mean']) # Average only non-zero values
 
 
224
 
225
+ # # Extract Zero Crossings
226
+ # zero_crossings = pd.Series(np.mean(librosa.feature.zero_crossing_rate(y)), index=['zero_crossings'])
 
227
 
228
+ # # Combine all features into a single Series
229
+ # features = pd.concat([mfccs_mean, spectral_centroids, spectral_rolloff, spectral_flux, spectral_contrast, pitch_mean, zero_crossings])
230
 
231
+ features = smile.process_file(file_path)
232
+ all_data = pd.concat([features], ignore_index=True)
233
 
234
+ return pd.DataFrame([all_data])
235
 
236
 
237
  @app.post("/mlp")