Spaces:

ahmedkasem
/

quran-nlp

Sleeping

App Files Files Community

deveix commited on Apr 22, 2024

Commit

5da4449

1 Parent(s): a1b9bc0

fix requirements

Browse files

Files changed (2) hide show

app/main.py +2 -89
requirements.txt +2 -2

app/main.py CHANGED Viewed

@@ -340,88 +340,8 @@ def preprocess_audio(audio_data, rate):
     audio_data = librosa.resample(audio_data, orig_sr=rate, target_sr=default_sample_rate)
     rate = default_sample_rate
-    # y_trimmed, _ = librosa.effects.trim(y_no_gaps, top_db = 20)
-    # D = librosa.stft(y)
-    # S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
-    # S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128*2,)
-    # S_db_mel = librosa.amplitude_to_db(np.abs(S), ref=np.max)
-    # Apply noise reduction (example using spectral subtraction)
-#     y_denoised = librosa.effects.preemphasis(y_trimmed)
-#     # Apply dynamic range compression
-#     y_compressed = librosa.effects.preemphasis(y_denoised)
-#     # Augmentation (example of time stretching)
-# #     y_stretched = librosa.effects.time_stretch(y_compressed, rate=1.2)
-#     # Silence Removal
-#     y_silence_removed, _ = librosa.effects.trim(y_compressed)
-#     # Equalization (example: apply high-pass filter)
-#     y_equalized = librosa.effects.preemphasis(y_silence_removed)
-#     # Define target sample rate
-#     target_sr = sr
-#     # Data Augmentation (example: pitch shifting)
-#     y_pitch_shifted = librosa.effects.pitch_shift(y_normalized, sr=target_sr, n_steps=2)
-    # Split audio into non-silent intervals
-    # Normalize the audio signal
-    # y_normalized = librosa.util.normalize(y_equalized)
-    # Feature Extraction (example: MFCCs)
-#     mfccs = librosa.feature.mfcc(y=y_normalized, sr=target_sr, n_mfcc=20)
-    # output_file_path = os.path.join(save_dir, f"{file_name_without_extension}.{extension}")
-    # Write the audio data to the output file in .wav format
-    # sf.write(path, y_normalized, target_sr)
     return audio_data, rate
-# smile = opensmile.Smile(
-#     feature_set=opensmile.FeatureSet.ComParE_2016,
-#     feature_level=opensmile.FeatureLevel.Functionals,
-# )
-# def extract_features(file_path):
-#     # # Load the audio file
-#     # y, sr = librosa.load(file_path, sr=None, dtype=np.float32)
-#     # # Extract MFCCs
-#     # mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
-#     # mfccs_mean = pd.Series(mfccs.mean(axis=1), index=[f'mfcc_{i}' for i in range(mfccs.shape[0])])
-#     # # Extract Spectral Features
-#     # spectral_centroids = pd.Series(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)), index=['spectral_centroid'])
-#     # spectral_rolloff = pd.Series(np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)), index=['spectral_rolloff'])
-#     # spectral_flux = pd.Series(np.mean(librosa.onset.onset_strength(y=y, sr=sr)), index=['spectral_flux'])
-#     # spectral_contrast = pd.Series(np.mean(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr), axis=1), index=[f'spectral_contrast_{i}' for i in range(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr).shape[0])])
-#     # # Extract Pitch
-#     # pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
-#     # pitch_mean = pd.Series(np.mean(pitches[pitches != 0]), index=['pitch_mean'])  # Average only non-zero values
-#     # # Extract Zero Crossings
-#     # zero_crossings = pd.Series(np.mean(librosa.feature.zero_crossing_rate(y)), index=['zero_crossings'])
-#     # # Combine all features into a single Series
-#     # features = pd.concat([mfccs_mean, spectral_centroids, spectral_rolloff, spectral_flux, spectral_contrast, pitch_mean, zero_crossings])
-#     features = smile.process_file(file_path)
-#     features_reshaped = features.squeeze()
-#     # Ensure it's now a 2D structure suitable for DataFrame
-#     print("New shape of features:", features_reshaped.shape)
-#     all_data = pd.DataFrame([features_reshaped])
-#     return all_data
 def repair_mp3_with_ffmpeg_python(input_path, output_path):
     """Attempt to repair an MP3 file using FFmpeg."""
     try:
@@ -469,13 +389,6 @@ async def handle_audio(file: UploadFile = File(...)):
         # Extract features
         features = extract_features(audio_data, sr)
-        # preprocess_audio(temp_filename, 'app')
-        # repair_mp3_with_ffmpeg_python(temp_filename, temp_filename)
-        # # Here you would add the feature extraction logic
-        # features = extract_features(temp_filename)
-        # print("Extracted Features:", features)
-        # features = pca.transform(features)
-        # features = np.array(features).reshape(1, -1)
         features = features.reshape(1, -1)
         features = scaler.transform(features)
@@ -484,10 +397,10 @@ async def handle_audio(file: UploadFile = File(...)):
         results = model.predict(features)
         # decoded_predictions = [label_encoder.classes_[i] for i in results]
-        # # Decode the predictions using the label encoder
         decoded_predictions = label_encoder.inverse_transform(results)
         print('decoded', decoded_predictions[0])
-        # .tolist()
         # Clean up the temporary file
         os.remove(temp_filename)
         print({"message": "File processed successfully", "sheikh": decoded_predictions[0]})

     audio_data = librosa.resample(audio_data, orig_sr=rate, target_sr=default_sample_rate)
     rate = default_sample_rate
     return audio_data, rate
 def repair_mp3_with_ffmpeg_python(input_path, output_path):
     """Attempt to repair an MP3 file using FFmpeg."""
     try:
         # Extract features
         features = extract_features(audio_data, sr)
         features = features.reshape(1, -1)
         features = scaler.transform(features)
         results = model.predict(features)
         # decoded_predictions = [label_encoder.classes_[i] for i in results]
+        # Decode the predictions using the label encoder
         decoded_predictions = label_encoder.inverse_transform(results)
         print('decoded', decoded_predictions[0])
         # Clean up the temporary file
         os.remove(temp_filename)
         print({"message": "File processed successfully", "sheikh": decoded_predictions[0]})

requirements.txt CHANGED Viewed

@@ -9,6 +9,7 @@ pypdf==4.0.2
 pymongo>=3.11
 tiktoken==0.6.0
 langchain-openai==0.0.8
 python-dotenv
 upstash-redis
 librosa
@@ -19,5 +20,4 @@ matplotlib
 python-multipart
 ffmpeg-python
 noisereduce
-scikit-learn==1.2.2
-tensorflow

 pymongo>=3.11
 tiktoken==0.6.0
 langchain-openai==0.0.8
+tensorflow
 python-dotenv
 upstash-redis
 librosa
 python-multipart
 ffmpeg-python
 noisereduce
+scikit-learn==1.2.2