Spaces:

ahmedkasem
/

quran-nlp

Sleeping

App Files Files Community

deveix commited on Apr 20, 2024

Commit

8faa556

1 Parent(s): e0568c1

update prediction

Browse files

Files changed (3) hide show

app/1713630229.4965415_trained_model.joblib +0 -0
app/main.py +133 -82
requirements.txt +2 -1

app/1713630229.4965415_trained_model.joblib ADDED Viewed

Binary file (16.6 kB). View file

app/main.py CHANGED Viewed

@@ -20,6 +20,48 @@ import soundfile as sf
 import opensmile
 import ffmpeg
 load_dotenv()
@@ -140,103 +182,103 @@ async def get_answer(item: Item, token: str = Depends(verify_token)):
         # If there's an error, return a 500 error with the error's details
         raise HTTPException(status_code=500, detail=str(e))
-# mlp
-mlp_model = joblib.load('app/mlp_model.pkl')
-mlp_pca = joblib.load('app/pca.pkl')
-mlp_scaler = joblib.load('app/scaler.pkl')
-mlp_label_encoder = joblib.load('app/label_encoder.pkl')
-def preprocess_audio(path, save_dir):
-    y, sr = librosa.load(path)
-    # remove silence
-    intervals = librosa.effects.split(y, top_db=20)
-    # Concatenate non-silent intervals
-    y_no_gaps = np.concatenate([y[start:end] for start, end in intervals])
-    file_name_without_extension = os.path.basename(path).split('.')[0]
-    extension = os.path.basename(path).split('.')[1]
-    y_trimmed, _ = librosa.effects.trim(y_no_gaps, top_db = 20)
-    D = librosa.stft(y)
-    S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
-    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128*2,)
-    S_db_mel = librosa.amplitude_to_db(np.abs(S), ref=np.max)
-    # Apply noise reduction (example using spectral subtraction)
-    y_denoised = librosa.effects.preemphasis(y_trimmed)
-    # Apply dynamic range compression
-    y_compressed = librosa.effects.preemphasis(y_denoised)
-    # Augmentation (example of time stretching)
-#     y_stretched = librosa.effects.time_stretch(y_compressed, rate=1.2)
-    # Silence Removal
-    y_silence_removed, _ = librosa.effects.trim(y_compressed)
-    # Equalization (example: apply high-pass filter)
-    y_equalized = librosa.effects.preemphasis(y_silence_removed)
-    # Define target sample rate
-    target_sr = sr
-#     # Data Augmentation (example: pitch shifting)
-#     y_pitch_shifted = librosa.effects.pitch_shift(y_normalized, sr=target_sr, n_steps=2)
-    # Split audio into non-silent intervals
-    # Normalize the audio signal
-    y_normalized = librosa.util.normalize(y_equalized)
-    # Feature Extraction (example: MFCCs)
-#     mfccs = librosa.feature.mfcc(y=y_normalized, sr=target_sr, n_mfcc=20)
-    # output_file_path = os.path.join(save_dir, f"{file_name_without_extension}.{extension}")
-    # Write the audio data to the output file in .wav format
-    sf.write(path, y_normalized, target_sr)
-    return 'success'
-smile = opensmile.Smile(
-    feature_set=opensmile.FeatureSet.ComParE_2016,
-    feature_level=opensmile.FeatureLevel.Functionals,
-)
-def extract_features(file_path):
-    # # Load the audio file
-    # y, sr = librosa.load(file_path, sr=None, dtype=np.float32)
-    # # Extract MFCCs
-    # mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
-    # mfccs_mean = pd.Series(mfccs.mean(axis=1), index=[f'mfcc_{i}' for i in range(mfccs.shape[0])])
-    # # Extract Spectral Features
-    # spectral_centroids = pd.Series(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)), index=['spectral_centroid'])
-    # spectral_rolloff = pd.Series(np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)), index=['spectral_rolloff'])
-    # spectral_flux = pd.Series(np.mean(librosa.onset.onset_strength(y=y, sr=sr)), index=['spectral_flux'])
-    # spectral_contrast = pd.Series(np.mean(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr), axis=1), index=[f'spectral_contrast_{i}' for i in range(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr).shape[0])])
-    # # Extract Pitch
-    # pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
-    # pitch_mean = pd.Series(np.mean(pitches[pitches != 0]), index=['pitch_mean'])  # Average only non-zero values
-    # # Extract Zero Crossings
-    # zero_crossings = pd.Series(np.mean(librosa.feature.zero_crossing_rate(y)), index=['zero_crossings'])
-    # # Combine all features into a single Series
-    # features = pd.concat([mfccs_mean, spectral_centroids, spectral_rolloff, spectral_flux, spectral_contrast, pitch_mean, zero_crossings])
-    features = smile.process_file(file_path)
-    features_reshaped = features.squeeze()
-    # Ensure it's now a 2D structure suitable for DataFrame
-    print("New shape of features:", features_reshaped.shape)
-    all_data = pd.DataFrame([features_reshaped])
-    return all_data
 def repair_mp3_with_ffmpeg_python(input_path, output_path):
     """Attempt to repair an MP3 file using FFmpeg."""
@@ -277,27 +319,36 @@ async def handle_audio(file: UploadFile = File(...)):
         with open(temp_filename, "wb") as f:
             f.write(contents)
-        preprocess_audio(temp_filename, 'app')
-        repair_mp3_with_ffmpeg_python(temp_filename, temp_filename)
-        # Here you would add the feature extraction logic
-        features = extract_features(temp_filename)
         print("Extracted Features:", features)
-        features = mlp_scaler.transform(features)
-        features = mlp_pca.transform(features)
         # proceed with an inference
-        results = mlp_model.predict(features)
-        decoded_predictions = [mlp_label_encoder.classes_[i] for i in results]
         # # Decode the predictions using the label encoder
-        # decoded_predictions = mlp_label_encoder.inverse_transform(results)
         # .tolist()
         # Clean up the temporary file
         os.remove(temp_filename)
         # Return a successful response with decoded predictions
-        return {"message": "File processed successfully", "prediction": decoded_predictions}
     except Exception as e:
         print(e)
         # Handle possible exceptions

 import opensmile
 import ffmpeg
+import noisereduce as nr
+import numpy as np
+default_sample_rate=22050
+def load(file_name, skip_seconds=0):
+    return librosa.load(file_name, sr=None, res_type='kaiser_fast')
+def preprocess_audio(audio_data, rate):
+    # Apply preprocessing steps
+    audio_data = nr.reduce_noise(y=audio_data, sr=rate)
+    audio_data = librosa.util.normalize(audio_data)
+    audio_data, _ = librosa.effects.trim(audio_data)
+    audio_data = librosa.resample(audio_data, orig_sr=rate, target_sr=default_sample_rate)
+#     audio_data = fix_length(audio_data)
+    rate = default_sample_rate
+    return audio_data, rate
+def extract_features(X, sample_rate):
+    # Generate Mel-frequency cepstral coefficients (MFCCs) from a time series
+    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
+    # Generates a Short-time Fourier transform (STFT) to use in the chroma_stft
+    stft = np.abs(librosa.stft(X))
+    # Computes a chromagram from a waveform or power spectrogram.
+    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
+    # Computes a mel-scaled spectrogram.
+    mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T,axis=0)
+    # Computes spectral contrast
+    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
+    # Computes the tonal centroid features (tonnetz)
+    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),sr=sample_rate).T,axis=0)
+    # Concatenate all feature arrays into a single 1D array
+    combined_features = np.hstack([mfccs, chroma, mel, contrast, tonnetz])
+    return combined_features
 load_dotenv()
         # If there's an error, return a 500 error with the error's details
         raise HTTPException(status_code=500, detail=str(e))
+# naive bayes
+nb_model = joblib.load('1713630229.4965415_trained_model.joblib')
+nb_pca = joblib.load('app/pca.pkl')
+nb_scaler = joblib.load('app/scaler.pkl')
+nb_label_encoder = joblib.load('app/label_encoder.pkl')
+# def preprocess_audio(path, save_dir):
+#     y, sr = librosa.load(path)
+#     # remove silence
+#     intervals = librosa.effects.split(y, top_db=20)
+#     # Concatenate non-silent intervals
+#     y_no_gaps = np.concatenate([y[start:end] for start, end in intervals])
+#     file_name_without_extension = os.path.basename(path).split('.')[0]
+#     extension = os.path.basename(path).split('.')[1]
+#     y_trimmed, _ = librosa.effects.trim(y_no_gaps, top_db = 20)
+#     D = librosa.stft(y)
+#     S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
+#     S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128*2,)
+#     S_db_mel = librosa.amplitude_to_db(np.abs(S), ref=np.max)
+#     # Apply noise reduction (example using spectral subtraction)
+#     y_denoised = librosa.effects.preemphasis(y_trimmed)
+#     # Apply dynamic range compression
+#     y_compressed = librosa.effects.preemphasis(y_denoised)
+#     # Augmentation (example of time stretching)
+# #     y_stretched = librosa.effects.time_stretch(y_compressed, rate=1.2)
+#     # Silence Removal
+#     y_silence_removed, _ = librosa.effects.trim(y_compressed)
+#     # Equalization (example: apply high-pass filter)
+#     y_equalized = librosa.effects.preemphasis(y_silence_removed)
+#     # Define target sample rate
+#     target_sr = sr
+# #     # Data Augmentation (example: pitch shifting)
+# #     y_pitch_shifted = librosa.effects.pitch_shift(y_normalized, sr=target_sr, n_steps=2)
+#     # Split audio into non-silent intervals
+#     # Normalize the audio signal
+#     y_normalized = librosa.util.normalize(y_equalized)
+#     # Feature Extraction (example: MFCCs)
+# #     mfccs = librosa.feature.mfcc(y=y_normalized, sr=target_sr, n_mfcc=20)
+#     # output_file_path = os.path.join(save_dir, f"{file_name_without_extension}.{extension}")
+#     # Write the audio data to the output file in .wav format
+#     sf.write(path, y_normalized, target_sr)
+#     return 'success'
+# smile = opensmile.Smile(
+#     feature_set=opensmile.FeatureSet.ComParE_2016,
+#     feature_level=opensmile.FeatureLevel.Functionals,
+# )
+# def extract_features(file_path):
+#     # # Load the audio file
+#     # y, sr = librosa.load(file_path, sr=None, dtype=np.float32)
+#     # # Extract MFCCs
+#     # mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
+#     # mfccs_mean = pd.Series(mfccs.mean(axis=1), index=[f'mfcc_{i}' for i in range(mfccs.shape[0])])
+#     # # Extract Spectral Features
+#     # spectral_centroids = pd.Series(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)), index=['spectral_centroid'])
+#     # spectral_rolloff = pd.Series(np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)), index=['spectral_rolloff'])
+#     # spectral_flux = pd.Series(np.mean(librosa.onset.onset_strength(y=y, sr=sr)), index=['spectral_flux'])
+#     # spectral_contrast = pd.Series(np.mean(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr), axis=1), index=[f'spectral_contrast_{i}' for i in range(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr).shape[0])])
+#     # # Extract Pitch
+#     # pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
+#     # pitch_mean = pd.Series(np.mean(pitches[pitches != 0]), index=['pitch_mean'])  # Average only non-zero values
+#     # # Extract Zero Crossings
+#     # zero_crossings = pd.Series(np.mean(librosa.feature.zero_crossing_rate(y)), index=['zero_crossings'])
+#     # # Combine all features into a single Series
+#     # features = pd.concat([mfccs_mean, spectral_centroids, spectral_rolloff, spectral_flux, spectral_contrast, pitch_mean, zero_crossings])
+#     features = smile.process_file(file_path)
+#     features_reshaped = features.squeeze()
+#     # Ensure it's now a 2D structure suitable for DataFrame
+#     print("New shape of features:", features_reshaped.shape)
+#     all_data = pd.DataFrame([features_reshaped])
+#     return all_data
 def repair_mp3_with_ffmpeg_python(input_path, output_path):
     """Attempt to repair an MP3 file using FFmpeg."""
         with open(temp_filename, "wb") as f:
             f.write(contents)
+        audio_data, sr = load(temp_filename, skip_seconds=5)
+        print("finished loading ", temp_filename)
+        # Preprocess data
+        audio_data, sr = preprocess_audio(audio_data, sr)
+        print("finished processing ", temp_filename)
+        # Extract features
+        features = extract_features(audio_data, sr)
+        # preprocess_audio(temp_filename, 'app')
+        # repair_mp3_with_ffmpeg_python(temp_filename, temp_filename)
+        # # Here you would add the feature extraction logic
+        # features = extract_features(temp_filename)
         print("Extracted Features:", features)
+        # features = nb_scaler.transform(features)
+        # features = nb_pca.transform(features)
+        features = np.array(features).reshape(1, -1)
         # proceed with an inference
+        results = nb_model.predict(features)
+        # decoded_predictions = [nb_label_encoder.classes_[i] for i in results]
         # # Decode the predictions using the label encoder
+        # decoded_predictions = nb_label_encoder.inverse_transform(results)
         # .tolist()
         # Clean up the temporary file
         os.remove(temp_filename)
         # Return a successful response with decoded predictions
+        return {"message": "File processed successfully", "prediction": results}
     except Exception as e:
         print(e)
         # Handle possible exceptions

requirements.txt CHANGED Viewed

@@ -17,4 +17,5 @@ opensmile
 eyeD3
 matplotlib
 python-multipart
-ffmpeg-python

 eyeD3
 matplotlib
 python-multipart
+ffmpeg-python
+noisereduce