import opensmile
import ffmpeg
# If there's an error, return a 500 error with the error's details
raise HTTPException(status_code=500, detail=str(e))
def preprocess_audio(path, save_dir):
# y_stretched = librosa.effects.time_stretch(y_compressed, rate=1.2)
# # Data Augmentation (example: pitch shifting)
# y_pitch_shifted = librosa.effects.pitch_shift(y_normalized, sr=target_sr, n_steps=2)
# mfccs = librosa.feature.mfcc(y=y_normalized, sr=target_sr, n_mfcc=20)
smile = opensmile.Smile(
def extract_features(file_path):
def repair_mp3_with_ffmpeg_python(input_path, output_path):
"""Attempt to repair an MP3 file using FFmpeg."""
with open(temp_filename, "wb") as f:
print("Extracted Features:", features)
features =
features =
# proceed with an inference
results =
decoded_predictions = [
# # Decode the predictions using the label encoder
# decoded_predictions =
# .tolist()
# Clean up the temporary file
# Return a successful response with decoded predictions
return {"message": "File processed successfully", "prediction":
except Exception as e:
# Handle possible exceptions
import opensmile
import ffmpeg
import noisereduce as nr
import numpy as np
def load(file_name, skip_seconds=0):
return librosa.load(file_name, sr=None, res_type='kaiser_fast')
def preprocess_audio(audio_data, rate):
# Apply preprocessing steps
audio_data = nr.reduce_noise(y=audio_data, sr=rate)
audio_data = librosa.util.normalize(audio_data)
audio_data, _ = librosa.effects.trim(audio_data)
audio_data = librosa.resample(audio_data, orig_sr=rate, target_sr=default_sample_rate)
# audio_data = fix_length(audio_data)
rate = default_sample_rate
return audio_data, rate
def extract_features(X, sample_rate):
# Generate Mel-frequency cepstral coefficients (MFCCs) from a time series
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
# Generates a Short-time Fourier transform (STFT) to use in the chroma_stft
stft = np.abs(librosa.stft(X))
# Computes a chromagram from a waveform or power spectrogram.
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
# Computes a mel-scaled spectrogram.
mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T,axis=0)
# Computes spectral contrast
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
# Computes the tonal centroid features (tonnetz)
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),sr=sample_rate).T,axis=0)
# Concatenate all feature arrays into a single 1D array
combined_features = np.hstack([mfccs, chroma, mel, contrast, tonnetz])
return combined_features
# If there's an error, return a 500 error with the error's details
raise HTTPException(status_code=500, detail=str(e))
# naive bayes
nb_model = joblib.load('1713630229.4965415_trained_model.joblib')
nb_pca = joblib.load('app/pca.pkl')
nb_scaler = joblib.load('app/scaler.pkl')
nb_label_encoder = joblib.load('app/label_encoder.pkl')
# def preprocess_audio(path, save_dir):
# y, sr = librosa.load(path)
# # remove silence
# intervals = librosa.effects.split(y, top_db=20)
# # Concatenate non-silent intervals
# y_no_gaps = np.concatenate([y[start:end] for start, end in intervals])
# file_name_without_extension = os.path.basename(path).split('.')[0]
# extension = os.path.basename(path).split('.')[1]
# y_trimmed, _ = librosa.effects.trim(y_no_gaps, top_db = 20)
# D = librosa.stft(y)
# S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
# S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128*2,)
# S_db_mel = librosa.amplitude_to_db(np.abs(S), ref=np.max)
# # Apply noise reduction (example using spectral subtraction)
# y_denoised = librosa.effects.preemphasis(y_trimmed)
# # Apply dynamic range compression
# y_compressed = librosa.effects.preemphasis(y_denoised)
# # Augmentation (example of time stretching)
# # y_stretched = librosa.effects.time_stretch(y_compressed, rate=1.2)
# # Silence Removal
# y_silence_removed, _ = librosa.effects.trim(y_compressed)
# # Equalization (example: apply high-pass filter)
# y_equalized = librosa.effects.preemphasis(y_silence_removed)
# # Define target sample rate
# target_sr = sr
# # # Data Augmentation (example: pitch shifting)
# # y_pitch_shifted = librosa.effects.pitch_shift(y_normalized, sr=target_sr, n_steps=2)
229 |
230 |
# # Normalize the audio signal
# y_normalized = librosa.util.normalize(y_equalized)
# # Feature Extraction (example: MFCCs)
# # mfccs = librosa.feature.mfcc(y=y_normalized, sr=target_sr, n_mfcc=20)
# # output_file_path = os.path.join(save_dir, f"{file_name_without_extension}.{extension}")
# # Write the audio data to the output file in .wav format
# sf.write(path, y_normalized, target_sr)
# return 'success'
# smile = opensmile.Smile(
# feature_set=opensmile.FeatureSet.ComParE_2016,
# feature_level=opensmile.FeatureLevel.Functionals,
# )
# def extract_features(file_path):
# # # Load the audio file
# # y, sr = librosa.load(file_path, sr=None, dtype=np.float32)
# # # Extract MFCCs
# # mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
# # mfccs_mean = pd.Series(mfccs.mean(axis=1), index=[f'mfcc_{i}' for i in range(mfccs.shape[0])])
# # # Extract Spectral Features
# # spectral_centroids = pd.Series(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)), index=['spectral_centroid'])
# # spectral_rolloff = pd.Series(np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)), index=['spectral_rolloff'])
# # spectral_flux = pd.Series(np.mean(librosa.onset.onset_strength(y=y, sr=sr)), index=['spectral_flux'])
# # spectral_contrast = pd.Series(np.mean(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr), axis=1), index=[f'spectral_contrast_{i}' for i in range(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr).shape[0])])
# # # Extract Pitch
# # pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
# # pitch_mean = pd.Series(np.mean(pitches[pitches != 0]), index=['pitch_mean']) # Average only non-zero values
# # # Extract Zero Crossings
# # zero_crossings = pd.Series(np.mean(librosa.feature.zero_crossing_rate(y)), index=['zero_crossings'])
# # # Combine all features into a single Series
# # features = pd.concat([mfccs_mean, spectral_centroids, spectral_rolloff, spectral_flux, spectral_contrast, pitch_mean, zero_crossings])
# features = smile.process_file(file_path)
# features_reshaped = features.squeeze()
# # Ensure it's now a 2D structure suitable for DataFrame
# print("New shape of features:", features_reshaped.shape)
# all_data = pd.DataFrame([features_reshaped])
# return all_data
def repair_mp3_with_ffmpeg_python(input_path, output_path):
"""Attempt to repair an MP3 file using FFmpeg."""
with open(temp_filename, "wb") as f:
audio_data, sr = load(temp_filename, skip_seconds=5)
print("finished loading ", temp_filename)
# Preprocess data
audio_data, sr = preprocess_audio(audio_data, sr)
print("finished processing ", temp_filename)
# Extract features
features = extract_features(audio_data, sr)
# preprocess_audio(temp_filename, 'app')
# repair_mp3_with_ffmpeg_python(temp_filename, temp_filename)
# # Here you would add the feature extraction logic
# features = extract_features(temp_filename)
print("Extracted Features:", features)
# features = nb_scaler.transform(features)
# features = nb_pca.transform(features)
features = np.array(features).reshape(1, -1)
# proceed with an inference
results = nb_model.predict(features)
# decoded_predictions = [nb_label_encoder.classes_[i] for i in results]
# # Decode the predictions using the label encoder
# decoded_predictions = nb_label_encoder.inverse_transform(results)
# .tolist()
# Clean up the temporary file
# Return a successful response with decoded predictions
return {"message": "File processed successfully", "prediction": results}
except Exception as e:
# Handle possible exceptions
