deveix commited on
Commit
3dddc6f
·
1 Parent(s): 491a059

change models

Browse files
Files changed (1) hide show
  1. app/main.py +57 -40
app/main.py CHANGED
@@ -182,66 +182,83 @@ async def get_answer(item: Item, token: str = Depends(verify_token)):
182
  raise HTTPException(status_code=500, detail=str(e))
183
 
184
  # random forest
185
- model = joblib.load('app/1713661391.0946255_trained_model.joblib')
186
  pca = joblib.load('app/pca.pkl')
187
- scaler = joblib.load('app/1713661464.8205004_scaler.joblib')
188
- label_encoder = joblib.load('app/1713661470.6730225_label_encoder.joblib')
189
 
190
  def preprocess_audio(audio_data, rate):
191
- audio_data = nr.reduce_noise(y=audio_data, sr=rate)
192
- # remove silence
193
- # intervals = librosa.effects.split(audio_data, top_db=20)
194
- # # Concatenate non-silent intervals
195
- # audio_data = np.concatenate([audio_data[start:end] for start, end in intervals])
196
 
197
- audio_data = librosa.util.normalize(audio_data)
198
  audio_data, _ = librosa.effects.trim(audio_data)
199
- audio_data = librosa.resample(audio_data, orig_sr=rate, target_sr=default_sample_rate)
200
- rate = default_sample_rate
201
-
202
- # y_trimmed, _ = librosa.effects.trim(y_no_gaps, top_db = 20)
203
- # D = librosa.stft(y)
204
- # S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
205
- # S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128*2,)
206
- # S_db_mel = librosa.amplitude_to_db(np.abs(S), ref=np.max)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
- # Apply noise reduction (example using spectral subtraction)
209
- # y_denoised = librosa.effects.preemphasis(y_trimmed)
210
 
211
- # # Apply dynamic range compression
212
- # y_compressed = librosa.effects.preemphasis(y_denoised)
213
 
214
- # # Augmentation (example of time stretching)
215
- # # y_stretched = librosa.effects.time_stretch(y_compressed, rate=1.2)
216
 
217
- # # Silence Removal
218
- # y_silence_removed, _ = librosa.effects.trim(y_compressed)
219
 
220
- # # Equalization (example: apply high-pass filter)
221
- # y_equalized = librosa.effects.preemphasis(y_silence_removed)
222
 
223
- # # Define target sample rate
224
- # target_sr = sr
225
 
226
- # # Data Augmentation (example: pitch shifting)
227
- # y_pitch_shifted = librosa.effects.pitch_shift(y_normalized, sr=target_sr, n_steps=2)
228
 
229
 
230
- # Split audio into non-silent intervals
231
 
232
 
233
- # Normalize the audio signal
234
- # y_normalized = librosa.util.normalize(y_equalized)
235
 
236
- # Feature Extraction (example: MFCCs)
237
- # mfccs = librosa.feature.mfcc(y=y_normalized, sr=target_sr, n_mfcc=20)
238
 
239
- # output_file_path = os.path.join(save_dir, f"{file_name_without_extension}.{extension}")
240
 
241
- # Write the audio data to the output file in .wav format
242
- # sf.write(path, y_normalized, target_sr)
243
 
244
- return audio_data, rate
245
 
246
  # smile = opensmile.Smile(
247
  # feature_set=opensmile.FeatureSet.ComParE_2016,
 
182
  raise HTTPException(status_code=500, detail=str(e))
183
 
184
  # random forest
185
+ model = joblib.load('app/1713696933.326759_trained_model.joblib')
186
  pca = joblib.load('app/pca.pkl')
187
+ scaler = joblib.load('app/1713696947.894978_scaler.joblib')
188
+ label_encoder = joblib.load('app/1713696954.9487948_label_encoder.joblib')
189
 
190
  def preprocess_audio(audio_data, rate):
191
+ # Resample first if the target rate is lower to reduce data size for subsequent operations
192
+ if rate > default_sample_rate:
193
+ audio_data = librosa.resample(audio_data, orig_sr=rate, target_sr=default_sample_rate)
194
+ rate = default_sample_rate
 
195
 
196
+ # Trim silence before applying computationally expensive noise reduction
197
  audio_data, _ = librosa.effects.trim(audio_data)
198
+
199
+ # Normalize the audio data
200
+ audio_data = librosa.util.normalize(audio_data)
201
+
202
+ # Apply noise reduction
203
+ audio_data = nr.reduce_noise(y=audio_data, sr=rate)
204
+
205
+ return audio_data, rate
206
+
207
+ # def preprocess_audio(audio_data, rate):
208
+ # audio_data = nr.reduce_noise(y=audio_data, sr=rate)
209
+ # # remove silence
210
+ # # intervals = librosa.effects.split(audio_data, top_db=20)
211
+ # # # Concatenate non-silent intervals
212
+ # # audio_data = np.concatenate([audio_data[start:end] for start, end in intervals])
213
+
214
+ # audio_data = librosa.util.normalize(audio_data)
215
+ # audio_data, _ = librosa.effects.trim(audio_data)
216
+ # audio_data = librosa.resample(audio_data, orig_sr=rate, target_sr=default_sample_rate)
217
+ # rate = default_sample_rate
218
+
219
+ # # y_trimmed, _ = librosa.effects.trim(y_no_gaps, top_db = 20)
220
+ # # D = librosa.stft(y)
221
+ # # S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
222
+ # # S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128*2,)
223
+ # # S_db_mel = librosa.amplitude_to_db(np.abs(S), ref=np.max)
224
 
225
+ # # Apply noise reduction (example using spectral subtraction)
226
+ # # y_denoised = librosa.effects.preemphasis(y_trimmed)
227
 
228
+ # # # Apply dynamic range compression
229
+ # # y_compressed = librosa.effects.preemphasis(y_denoised)
230
 
231
+ # # # Augmentation (example of time stretching)
232
+ # # # y_stretched = librosa.effects.time_stretch(y_compressed, rate=1.2)
233
 
234
+ # # # Silence Removal
235
+ # # y_silence_removed, _ = librosa.effects.trim(y_compressed)
236
 
237
+ # # # Equalization (example: apply high-pass filter)
238
+ # # y_equalized = librosa.effects.preemphasis(y_silence_removed)
239
 
240
+ # # # Define target sample rate
241
+ # # target_sr = sr
242
 
243
+ # # # Data Augmentation (example: pitch shifting)
244
+ # # y_pitch_shifted = librosa.effects.pitch_shift(y_normalized, sr=target_sr, n_steps=2)
245
 
246
 
247
+ # # Split audio into non-silent intervals
248
 
249
 
250
+ # # Normalize the audio signal
251
+ # # y_normalized = librosa.util.normalize(y_equalized)
252
 
253
+ # # Feature Extraction (example: MFCCs)
254
+ # # mfccs = librosa.feature.mfcc(y=y_normalized, sr=target_sr, n_mfcc=20)
255
 
256
+ # # output_file_path = os.path.join(save_dir, f"{file_name_without_extension}.{extension}")
257
 
258
+ # # Write the audio data to the output file in .wav format
259
+ # # sf.write(path, y_normalized, target_sr)
260
 
261
+ # return audio_data, rate
262
 
263
  # smile = opensmile.Smile(
264
  # feature_set=opensmile.FeatureSet.ComParE_2016,