Spaces:
Runtime error
Runtime error
deveix
commited on
Commit
·
5da4449
1
Parent(s):
a1b9bc0
fix requirements
Browse files- app/main.py +2 -89
- requirements.txt +2 -2
app/main.py
CHANGED
|
@@ -340,88 +340,8 @@ def preprocess_audio(audio_data, rate):
|
|
| 340 |
audio_data = librosa.resample(audio_data, orig_sr=rate, target_sr=default_sample_rate)
|
| 341 |
rate = default_sample_rate
|
| 342 |
|
| 343 |
-
# y_trimmed, _ = librosa.effects.trim(y_no_gaps, top_db = 20)
|
| 344 |
-
# D = librosa.stft(y)
|
| 345 |
-
# S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
|
| 346 |
-
# S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128*2,)
|
| 347 |
-
# S_db_mel = librosa.amplitude_to_db(np.abs(S), ref=np.max)
|
| 348 |
-
|
| 349 |
-
# Apply noise reduction (example using spectral subtraction)
|
| 350 |
-
# y_denoised = librosa.effects.preemphasis(y_trimmed)
|
| 351 |
-
|
| 352 |
-
# # Apply dynamic range compression
|
| 353 |
-
# y_compressed = librosa.effects.preemphasis(y_denoised)
|
| 354 |
-
|
| 355 |
-
# # Augmentation (example of time stretching)
|
| 356 |
-
# # y_stretched = librosa.effects.time_stretch(y_compressed, rate=1.2)
|
| 357 |
-
|
| 358 |
-
# # Silence Removal
|
| 359 |
-
# y_silence_removed, _ = librosa.effects.trim(y_compressed)
|
| 360 |
-
|
| 361 |
-
# # Equalization (example: apply high-pass filter)
|
| 362 |
-
# y_equalized = librosa.effects.preemphasis(y_silence_removed)
|
| 363 |
-
|
| 364 |
-
# # Define target sample rate
|
| 365 |
-
# target_sr = sr
|
| 366 |
-
|
| 367 |
-
# # Data Augmentation (example: pitch shifting)
|
| 368 |
-
# y_pitch_shifted = librosa.effects.pitch_shift(y_normalized, sr=target_sr, n_steps=2)
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
# Split audio into non-silent intervals
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
# Normalize the audio signal
|
| 375 |
-
# y_normalized = librosa.util.normalize(y_equalized)
|
| 376 |
-
|
| 377 |
-
# Feature Extraction (example: MFCCs)
|
| 378 |
-
# mfccs = librosa.feature.mfcc(y=y_normalized, sr=target_sr, n_mfcc=20)
|
| 379 |
-
|
| 380 |
-
# output_file_path = os.path.join(save_dir, f"{file_name_without_extension}.{extension}")
|
| 381 |
-
|
| 382 |
-
# Write the audio data to the output file in .wav format
|
| 383 |
-
# sf.write(path, y_normalized, target_sr)
|
| 384 |
-
|
| 385 |
return audio_data, rate
|
| 386 |
|
| 387 |
-
# smile = opensmile.Smile(
|
| 388 |
-
# feature_set=opensmile.FeatureSet.ComParE_2016,
|
| 389 |
-
# feature_level=opensmile.FeatureLevel.Functionals,
|
| 390 |
-
# )
|
| 391 |
-
|
| 392 |
-
# def extract_features(file_path):
|
| 393 |
-
# # # Load the audio file
|
| 394 |
-
# # y, sr = librosa.load(file_path, sr=None, dtype=np.float32)
|
| 395 |
-
|
| 396 |
-
# # # Extract MFCCs
|
| 397 |
-
# # mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
|
| 398 |
-
# # mfccs_mean = pd.Series(mfccs.mean(axis=1), index=[f'mfcc_{i}' for i in range(mfccs.shape[0])])
|
| 399 |
-
|
| 400 |
-
# # # Extract Spectral Features
|
| 401 |
-
# # spectral_centroids = pd.Series(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)), index=['spectral_centroid'])
|
| 402 |
-
# # spectral_rolloff = pd.Series(np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)), index=['spectral_rolloff'])
|
| 403 |
-
# # spectral_flux = pd.Series(np.mean(librosa.onset.onset_strength(y=y, sr=sr)), index=['spectral_flux'])
|
| 404 |
-
# # spectral_contrast = pd.Series(np.mean(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr), axis=1), index=[f'spectral_contrast_{i}' for i in range(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr).shape[0])])
|
| 405 |
-
|
| 406 |
-
# # # Extract Pitch
|
| 407 |
-
# # pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
|
| 408 |
-
# # pitch_mean = pd.Series(np.mean(pitches[pitches != 0]), index=['pitch_mean']) # Average only non-zero values
|
| 409 |
-
|
| 410 |
-
# # # Extract Zero Crossings
|
| 411 |
-
# # zero_crossings = pd.Series(np.mean(librosa.feature.zero_crossing_rate(y)), index=['zero_crossings'])
|
| 412 |
-
|
| 413 |
-
# # # Combine all features into a single Series
|
| 414 |
-
# # features = pd.concat([mfccs_mean, spectral_centroids, spectral_rolloff, spectral_flux, spectral_contrast, pitch_mean, zero_crossings])
|
| 415 |
-
|
| 416 |
-
# features = smile.process_file(file_path)
|
| 417 |
-
# features_reshaped = features.squeeze()
|
| 418 |
-
|
| 419 |
-
# # Ensure it's now a 2D structure suitable for DataFrame
|
| 420 |
-
# print("New shape of features:", features_reshaped.shape)
|
| 421 |
-
|
| 422 |
-
# all_data = pd.DataFrame([features_reshaped])
|
| 423 |
-
# return all_data
|
| 424 |
-
|
| 425 |
def repair_mp3_with_ffmpeg_python(input_path, output_path):
|
| 426 |
"""Attempt to repair an MP3 file using FFmpeg."""
|
| 427 |
try:
|
|
@@ -469,13 +389,6 @@ async def handle_audio(file: UploadFile = File(...)):
|
|
| 469 |
# Extract features
|
| 470 |
features = extract_features(audio_data, sr)
|
| 471 |
|
| 472 |
-
# preprocess_audio(temp_filename, 'app')
|
| 473 |
-
# repair_mp3_with_ffmpeg_python(temp_filename, temp_filename)
|
| 474 |
-
# # Here you would add the feature extraction logic
|
| 475 |
-
# features = extract_features(temp_filename)
|
| 476 |
-
# print("Extracted Features:", features)
|
| 477 |
-
# features = pca.transform(features)
|
| 478 |
-
# features = np.array(features).reshape(1, -1)
|
| 479 |
features = features.reshape(1, -1)
|
| 480 |
|
| 481 |
features = scaler.transform(features)
|
|
@@ -484,10 +397,10 @@ async def handle_audio(file: UploadFile = File(...)):
|
|
| 484 |
results = model.predict(features)
|
| 485 |
# decoded_predictions = [label_encoder.classes_[i] for i in results]
|
| 486 |
|
| 487 |
-
#
|
| 488 |
decoded_predictions = label_encoder.inverse_transform(results)
|
| 489 |
print('decoded', decoded_predictions[0])
|
| 490 |
-
|
| 491 |
# Clean up the temporary file
|
| 492 |
os.remove(temp_filename)
|
| 493 |
print({"message": "File processed successfully", "sheikh": decoded_predictions[0]})
|
|
|
|
| 340 |
audio_data = librosa.resample(audio_data, orig_sr=rate, target_sr=default_sample_rate)
|
| 341 |
rate = default_sample_rate
|
| 342 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
return audio_data, rate
|
| 344 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
def repair_mp3_with_ffmpeg_python(input_path, output_path):
|
| 346 |
"""Attempt to repair an MP3 file using FFmpeg."""
|
| 347 |
try:
|
|
|
|
| 389 |
# Extract features
|
| 390 |
features = extract_features(audio_data, sr)
|
| 391 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 392 |
features = features.reshape(1, -1)
|
| 393 |
|
| 394 |
features = scaler.transform(features)
|
|
|
|
| 397 |
results = model.predict(features)
|
| 398 |
# decoded_predictions = [label_encoder.classes_[i] for i in results]
|
| 399 |
|
| 400 |
+
# Decode the predictions using the label encoder
|
| 401 |
decoded_predictions = label_encoder.inverse_transform(results)
|
| 402 |
print('decoded', decoded_predictions[0])
|
| 403 |
+
|
| 404 |
# Clean up the temporary file
|
| 405 |
os.remove(temp_filename)
|
| 406 |
print({"message": "File processed successfully", "sheikh": decoded_predictions[0]})
|
requirements.txt
CHANGED
|
@@ -9,6 +9,7 @@ pypdf==4.0.2
|
|
| 9 |
pymongo>=3.11
|
| 10 |
tiktoken==0.6.0
|
| 11 |
langchain-openai==0.0.8
|
|
|
|
| 12 |
python-dotenv
|
| 13 |
upstash-redis
|
| 14 |
librosa
|
|
@@ -19,5 +20,4 @@ matplotlib
|
|
| 19 |
python-multipart
|
| 20 |
ffmpeg-python
|
| 21 |
noisereduce
|
| 22 |
-
scikit-learn==1.2.2
|
| 23 |
-
tensorflow
|
|
|
|
| 9 |
pymongo>=3.11
|
| 10 |
tiktoken==0.6.0
|
| 11 |
langchain-openai==0.0.8
|
| 12 |
+
tensorflow
|
| 13 |
python-dotenv
|
| 14 |
upstash-redis
|
| 15 |
librosa
|
|
|
|
| 20 |
python-multipart
|
| 21 |
ffmpeg-python
|
| 22 |
noisereduce
|
| 23 |
+
scikit-learn==1.2.2
|
|
|