deveix commited on
Commit
5da4449
·
1 Parent(s): a1b9bc0

fix requirements

Browse files
Files changed (2) hide show
  1. app/main.py +2 -89
  2. requirements.txt +2 -2
app/main.py CHANGED
@@ -340,88 +340,8 @@ def preprocess_audio(audio_data, rate):
340
  audio_data = librosa.resample(audio_data, orig_sr=rate, target_sr=default_sample_rate)
341
  rate = default_sample_rate
342
 
343
- # y_trimmed, _ = librosa.effects.trim(y_no_gaps, top_db = 20)
344
- # D = librosa.stft(y)
345
- # S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
346
- # S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128*2,)
347
- # S_db_mel = librosa.amplitude_to_db(np.abs(S), ref=np.max)
348
-
349
- # Apply noise reduction (example using spectral subtraction)
350
- # y_denoised = librosa.effects.preemphasis(y_trimmed)
351
-
352
- # # Apply dynamic range compression
353
- # y_compressed = librosa.effects.preemphasis(y_denoised)
354
-
355
- # # Augmentation (example of time stretching)
356
- # # y_stretched = librosa.effects.time_stretch(y_compressed, rate=1.2)
357
-
358
- # # Silence Removal
359
- # y_silence_removed, _ = librosa.effects.trim(y_compressed)
360
-
361
- # # Equalization (example: apply high-pass filter)
362
- # y_equalized = librosa.effects.preemphasis(y_silence_removed)
363
-
364
- # # Define target sample rate
365
- # target_sr = sr
366
-
367
- # # Data Augmentation (example: pitch shifting)
368
- # y_pitch_shifted = librosa.effects.pitch_shift(y_normalized, sr=target_sr, n_steps=2)
369
-
370
-
371
- # Split audio into non-silent intervals
372
-
373
-
374
- # Normalize the audio signal
375
- # y_normalized = librosa.util.normalize(y_equalized)
376
-
377
- # Feature Extraction (example: MFCCs)
378
- # mfccs = librosa.feature.mfcc(y=y_normalized, sr=target_sr, n_mfcc=20)
379
-
380
- # output_file_path = os.path.join(save_dir, f"{file_name_without_extension}.{extension}")
381
-
382
- # Write the audio data to the output file in .wav format
383
- # sf.write(path, y_normalized, target_sr)
384
-
385
  return audio_data, rate
386
 
387
- # smile = opensmile.Smile(
388
- # feature_set=opensmile.FeatureSet.ComParE_2016,
389
- # feature_level=opensmile.FeatureLevel.Functionals,
390
- # )
391
-
392
- # def extract_features(file_path):
393
- # # # Load the audio file
394
- # # y, sr = librosa.load(file_path, sr=None, dtype=np.float32)
395
-
396
- # # # Extract MFCCs
397
- # # mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
398
- # # mfccs_mean = pd.Series(mfccs.mean(axis=1), index=[f'mfcc_{i}' for i in range(mfccs.shape[0])])
399
-
400
- # # # Extract Spectral Features
401
- # # spectral_centroids = pd.Series(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)), index=['spectral_centroid'])
402
- # # spectral_rolloff = pd.Series(np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)), index=['spectral_rolloff'])
403
- # # spectral_flux = pd.Series(np.mean(librosa.onset.onset_strength(y=y, sr=sr)), index=['spectral_flux'])
404
- # # spectral_contrast = pd.Series(np.mean(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr), axis=1), index=[f'spectral_contrast_{i}' for i in range(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr).shape[0])])
405
-
406
- # # # Extract Pitch
407
- # # pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
408
- # # pitch_mean = pd.Series(np.mean(pitches[pitches != 0]), index=['pitch_mean']) # Average only non-zero values
409
-
410
- # # # Extract Zero Crossings
411
- # # zero_crossings = pd.Series(np.mean(librosa.feature.zero_crossing_rate(y)), index=['zero_crossings'])
412
-
413
- # # # Combine all features into a single Series
414
- # # features = pd.concat([mfccs_mean, spectral_centroids, spectral_rolloff, spectral_flux, spectral_contrast, pitch_mean, zero_crossings])
415
-
416
- # features = smile.process_file(file_path)
417
- # features_reshaped = features.squeeze()
418
-
419
- # # Ensure it's now a 2D structure suitable for DataFrame
420
- # print("New shape of features:", features_reshaped.shape)
421
-
422
- # all_data = pd.DataFrame([features_reshaped])
423
- # return all_data
424
-
425
  def repair_mp3_with_ffmpeg_python(input_path, output_path):
426
  """Attempt to repair an MP3 file using FFmpeg."""
427
  try:
@@ -469,13 +389,6 @@ async def handle_audio(file: UploadFile = File(...)):
469
  # Extract features
470
  features = extract_features(audio_data, sr)
471
 
472
- # preprocess_audio(temp_filename, 'app')
473
- # repair_mp3_with_ffmpeg_python(temp_filename, temp_filename)
474
- # # Here you would add the feature extraction logic
475
- # features = extract_features(temp_filename)
476
- # print("Extracted Features:", features)
477
- # features = pca.transform(features)
478
- # features = np.array(features).reshape(1, -1)
479
  features = features.reshape(1, -1)
480
 
481
  features = scaler.transform(features)
@@ -484,10 +397,10 @@ async def handle_audio(file: UploadFile = File(...)):
484
  results = model.predict(features)
485
  # decoded_predictions = [label_encoder.classes_[i] for i in results]
486
 
487
- # # Decode the predictions using the label encoder
488
  decoded_predictions = label_encoder.inverse_transform(results)
489
  print('decoded', decoded_predictions[0])
490
- # .tolist()
491
  # Clean up the temporary file
492
  os.remove(temp_filename)
493
  print({"message": "File processed successfully", "sheikh": decoded_predictions[0]})
 
340
  audio_data = librosa.resample(audio_data, orig_sr=rate, target_sr=default_sample_rate)
341
  rate = default_sample_rate
342
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
  return audio_data, rate
344
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  def repair_mp3_with_ffmpeg_python(input_path, output_path):
346
  """Attempt to repair an MP3 file using FFmpeg."""
347
  try:
 
389
  # Extract features
390
  features = extract_features(audio_data, sr)
391
 
 
 
 
 
 
 
 
392
  features = features.reshape(1, -1)
393
 
394
  features = scaler.transform(features)
 
397
  results = model.predict(features)
398
  # decoded_predictions = [label_encoder.classes_[i] for i in results]
399
 
400
+ # Decode the predictions using the label encoder
401
  decoded_predictions = label_encoder.inverse_transform(results)
402
  print('decoded', decoded_predictions[0])
403
+
404
  # Clean up the temporary file
405
  os.remove(temp_filename)
406
  print({"message": "File processed successfully", "sheikh": decoded_predictions[0]})
requirements.txt CHANGED
@@ -9,6 +9,7 @@ pypdf==4.0.2
9
  pymongo>=3.11
10
  tiktoken==0.6.0
11
  langchain-openai==0.0.8
 
12
  python-dotenv
13
  upstash-redis
14
  librosa
@@ -19,5 +20,4 @@ matplotlib
19
  python-multipart
20
  ffmpeg-python
21
  noisereduce
22
- scikit-learn==1.2.2
23
- tensorflow
 
9
  pymongo>=3.11
10
  tiktoken==0.6.0
11
  langchain-openai==0.0.8
12
+ tensorflow
13
  python-dotenv
14
  upstash-redis
15
  librosa
 
20
  python-multipart
21
  ffmpeg-python
22
  noisereduce
23
+ scikit-learn==1.2.2