deveix commited on
Commit
754923b
·
1 Parent(s): 9dae67d
Files changed (1) hide show
  1. app/main.py +39 -36
app/main.py CHANGED
@@ -27,16 +27,16 @@ default_sample_rate=22050
27
  def load(file_name, skip_seconds=0):
28
  return librosa.load(file_name, sr=None, res_type='kaiser_fast')
29
 
30
- def preprocess_audio(audio_data, rate):
31
- # Apply preprocessing steps
32
- audio_data = nr.reduce_noise(y=audio_data, sr=rate)
33
- audio_data = librosa.util.normalize(audio_data)
34
- audio_data, _ = librosa.effects.trim(audio_data)
35
- audio_data = librosa.resample(audio_data, orig_sr=rate, target_sr=default_sample_rate)
36
- # audio_data = fix_length(audio_data)
37
- rate = default_sample_rate
38
 
39
- return audio_data, rate
40
 
41
  def extract_features(X, sample_rate):
42
  # Generate Mel-frequency cepstral coefficients (MFCCs) from a time series
@@ -187,23 +187,26 @@ pca = joblib.load('app/pca.pkl')
187
  scaler = joblib.load('app/1713638595.3178492_scaler.joblib')
188
  label_encoder = joblib.load('app/1713638744.044928_label_encoder.joblib')
189
 
190
- # def preprocess_audio(path, save_dir):
191
- # y, sr = librosa.load(path)
192
-
193
- # # remove silence
194
- # intervals = librosa.effects.split(y, top_db=20)
195
- # # Concatenate non-silent intervals
196
- # y_no_gaps = np.concatenate([y[start:end] for start, end in intervals])
197
-
198
- # file_name_without_extension = os.path.basename(path).split('.')[0]
199
- # extension = os.path.basename(path).split('.')[1]
200
- # y_trimmed, _ = librosa.effects.trim(y_no_gaps, top_db = 20)
201
- # D = librosa.stft(y)
202
- # S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
203
- # S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128*2,)
204
- # S_db_mel = librosa.amplitude_to_db(np.abs(S), ref=np.max)
 
 
 
205
 
206
- # # Apply noise reduction (example using spectral subtraction)
207
  # y_denoised = librosa.effects.preemphasis(y_trimmed)
208
 
209
  # # Apply dynamic range compression
@@ -221,25 +224,25 @@ label_encoder = joblib.load('app/1713638744.044928_label_encoder.joblib')
221
  # # Define target sample rate
222
  # target_sr = sr
223
 
224
- # # # Data Augmentation (example: pitch shifting)
225
- # # y_pitch_shifted = librosa.effects.pitch_shift(y_normalized, sr=target_sr, n_steps=2)
226
 
227
 
228
- # # Split audio into non-silent intervals
229
 
230
 
231
- # # Normalize the audio signal
232
- # y_normalized = librosa.util.normalize(y_equalized)
233
 
234
- # # Feature Extraction (example: MFCCs)
235
- # # mfccs = librosa.feature.mfcc(y=y_normalized, sr=target_sr, n_mfcc=20)
236
 
237
- # # output_file_path = os.path.join(save_dir, f"{file_name_without_extension}.{extension}")
238
 
239
- # # Write the audio data to the output file in .wav format
240
- # sf.write(path, y_normalized, target_sr)
241
 
242
- # return 'success'
243
 
244
  # smile = opensmile.Smile(
245
  # feature_set=opensmile.FeatureSet.ComParE_2016,
 
27
  def load(file_name, skip_seconds=0):
28
  return librosa.load(file_name, sr=None, res_type='kaiser_fast')
29
 
30
+ # def preprocess_audio(audio_data, rate):
31
+ # # Apply preprocessing steps
32
+ # audio_data = nr.reduce_noise(y=audio_data, sr=rate)
33
+ # audio_data = librosa.util.normalize(audio_data)
34
+ # audio_data, _ = librosa.effects.trim(audio_data)
35
+ # audio_data = librosa.resample(audio_data, orig_sr=rate, target_sr=default_sample_rate)
36
+ # # audio_data = fix_length(audio_data)
37
+ # rate = default_sample_rate
38
 
39
+ # return audio_data, rate
40
 
41
  def extract_features(X, sample_rate):
42
  # Generate Mel-frequency cepstral coefficients (MFCCs) from a time series
 
187
  scaler = joblib.load('app/1713638595.3178492_scaler.joblib')
188
  label_encoder = joblib.load('app/1713638744.044928_label_encoder.joblib')
189
 
190
+ def preprocess_audio(audio_data, rate):
191
+ audio_data = nr.reduce_noise(y=audio_data, sr=rate)
192
+ # remove silence
193
+ intervals = librosa.effects.split(audio_data, top_db=20)
194
+ # Concatenate non-silent intervals
195
+ audio_data = np.concatenate([audio_data[start:end] for start, end in intervals])
196
+
197
+ audio_data = librosa.util.normalize(audio_data)
198
+ audio_data, _ = librosa.effects.trim(audio_data)
199
+ audio_data = librosa.resample(audio_data, orig_sr=rate, target_sr=default_sample_rate)
200
+ # audio_data = fix_length(audio_data)
201
+ rate = default_sample_rate
202
+
203
+ # y_trimmed, _ = librosa.effects.trim(y_no_gaps, top_db = 20)
204
+ # D = librosa.stft(y)
205
+ # S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
206
+ # S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128*2,)
207
+ # S_db_mel = librosa.amplitude_to_db(np.abs(S), ref=np.max)
208
 
209
+ # Apply noise reduction (example using spectral subtraction)
210
  # y_denoised = librosa.effects.preemphasis(y_trimmed)
211
 
212
  # # Apply dynamic range compression
 
224
  # # Define target sample rate
225
  # target_sr = sr
226
 
227
+ # # Data Augmentation (example: pitch shifting)
228
+ # y_pitch_shifted = librosa.effects.pitch_shift(y_normalized, sr=target_sr, n_steps=2)
229
 
230
 
231
+ # Split audio into non-silent intervals
232
 
233
 
234
+ # Normalize the audio signal
235
+ # y_normalized = librosa.util.normalize(y_equalized)
236
 
237
+ # Feature Extraction (example: MFCCs)
238
+ # mfccs = librosa.feature.mfcc(y=y_normalized, sr=target_sr, n_mfcc=20)
239
 
240
+ # output_file_path = os.path.join(save_dir, f"{file_name_without_extension}.{extension}")
241
 
242
+ # Write the audio data to the output file in .wav format
243
+ # sf.write(path, y_normalized, target_sr)
244
 
245
+ return audio_data, rate
246
 
247
  # smile = opensmile.Smile(
248
  # feature_set=opensmile.FeatureSet.ComParE_2016,