Spaces:
Sleeping
Sleeping
deveix
commited on
Commit
·
bbf7597
1
Parent(s):
886c1e1
mlp
Browse files- app/label_encoder.pkl +0 -0
- app/main.py +142 -1
- app/mlp_model.pkl +0 -0
- app/pca.pkl +0 -0
- app/scaler.pkl +0 -0
- requirements.txt +5 -1
app/label_encoder.pkl
ADDED
|
Binary file (2.15 kB). View file
|
|
|
app/main.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from fastapi import FastAPI,
|
| 2 |
from pydantic import BaseModel
|
| 3 |
import os
|
| 4 |
from pymongo import MongoClient
|
|
@@ -7,6 +7,25 @@ from langchain_community.vectorstores import MongoDBAtlasVectorSearch
|
|
| 7 |
import uvicorn
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
load_dotenv()
|
| 12 |
|
|
@@ -127,5 +146,127 @@ async def get_answer(item: Item, token: str = Depends(verify_token)):
|
|
| 127 |
# If there's an error, return a 500 error with the error's details
|
| 128 |
raise HTTPException(status_code=500, detail=str(e))
|
| 129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
# if __name__ == "__main__":
|
| 131 |
# uvicorn.run("main:app", host="0.0.0.0", port=8080, reload=False)
|
|
|
|
| 1 |
+
from fastapi import FastAPI, File, UploadFile, HTTPException, Depends
|
| 2 |
from pydantic import BaseModel
|
| 3 |
import os
|
| 4 |
from pymongo import MongoClient
|
|
|
|
| 7 |
import uvicorn
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
from fastapi.middleware.cors import CORSMiddleware
|
| 10 |
+
from uuid import uuid4
|
| 11 |
+
|
| 12 |
+
from typing import Optional
|
| 13 |
+
import joblib
|
| 14 |
+
import librosa
|
| 15 |
+
import numpy as np
|
| 16 |
+
|
| 17 |
+
import pandas as pd
|
| 18 |
+
import numpy as np
|
| 19 |
+
import matplotlib.pylab as plt
|
| 20 |
+
import seaborn as sns
|
| 21 |
+
|
| 22 |
+
from glob import glob
|
| 23 |
+
import librosa.display
|
| 24 |
+
import IPython.display as ipd
|
| 25 |
+
import soundfile as sf
|
| 26 |
+
|
| 27 |
+
from itertools import cycle
|
| 28 |
+
|
| 29 |
|
| 30 |
load_dotenv()
|
| 31 |
|
|
|
|
| 146 |
# If there's an error, return a 500 error with the error's details
|
| 147 |
raise HTTPException(status_code=500, detail=str(e))
|
| 148 |
|
| 149 |
+
# mlp
|
| 150 |
+
mlp_model = joblib.load('app/mlp_model.pkl')
|
| 151 |
+
mlp_pca = joblib.load('app/pca.pkl')
|
| 152 |
+
mlp_scaler = joblib.load('app/scaler.pkl')
|
| 153 |
+
mlp_label_encoder = joblib.load('app/label_encoder.pkl')
|
| 154 |
+
|
| 155 |
+
def preprocess_audio(path, save_dir):
|
| 156 |
+
y, sr = librosa.load(path)
|
| 157 |
+
|
| 158 |
+
# remove silence
|
| 159 |
+
intervals = librosa.effects.split(y, top_db=20)
|
| 160 |
+
# Concatenate non-silent intervals
|
| 161 |
+
y_no_gaps = np.concatenate([y[start:end] for start, end in intervals])
|
| 162 |
+
|
| 163 |
+
file_name_without_extension = os.path.basename(path).split('.')[0]
|
| 164 |
+
y_trimmed, _ = librosa.effects.trim(y_no_gaps, top_db = 20)
|
| 165 |
+
D = librosa.stft(y)
|
| 166 |
+
S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
|
| 167 |
+
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128*2,)
|
| 168 |
+
S_db_mel = librosa.amplitude_to_db(np.abs(S), ref=np.max)
|
| 169 |
+
|
| 170 |
+
# Apply noise reduction (example using spectral subtraction)
|
| 171 |
+
y_denoised = librosa.effects.preemphasis(y_trimmed)
|
| 172 |
+
|
| 173 |
+
# Apply dynamic range compression
|
| 174 |
+
y_compressed = librosa.effects.preemphasis(y_denoised)
|
| 175 |
+
|
| 176 |
+
# Augmentation (example of time stretching)
|
| 177 |
+
# y_stretched = librosa.effects.time_stretch(y_compressed, rate=1.2)
|
| 178 |
+
|
| 179 |
+
# Silence Removal
|
| 180 |
+
y_silence_removed, _ = librosa.effects.trim(y_compressed)
|
| 181 |
+
|
| 182 |
+
# Equalization (example: apply high-pass filter)
|
| 183 |
+
y_equalized = librosa.effects.preemphasis(y_silence_removed)
|
| 184 |
+
|
| 185 |
+
# Define target sample rate
|
| 186 |
+
target_sr = sr
|
| 187 |
+
|
| 188 |
+
# # Data Augmentation (example: pitch shifting)
|
| 189 |
+
# y_pitch_shifted = librosa.effects.pitch_shift(y_normalized, sr=target_sr, n_steps=2)
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
# Split audio into non-silent intervals
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
# Normalize the audio signal
|
| 196 |
+
y_normalized = librosa.util.normalize(y_equalized)
|
| 197 |
+
|
| 198 |
+
# Feature Extraction (example: MFCCs)
|
| 199 |
+
# mfccs = librosa.feature.mfcc(y=y_normalized, sr=target_sr, n_mfcc=20)
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
output_file_path = os.path.join(save_dir, f"{file_name_without_extension}.wav")
|
| 203 |
+
|
| 204 |
+
# Write the audio data to the output file in .wav format
|
| 205 |
+
sf.write(output_file_path, y_normalized, target_sr)
|
| 206 |
+
|
| 207 |
+
return 'success'
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
def extract_features(file_path):
|
| 211 |
+
# Load the audio file
|
| 212 |
+
y, sr = librosa.load(file_path, sr=None, dtype=np.float32)
|
| 213 |
+
|
| 214 |
+
# Extract MFCCs
|
| 215 |
+
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
|
| 216 |
+
mfccs_mean = pd.Series(mfccs.mean(axis=1), index=[f'mfcc_{i}' for i in range(mfccs.shape[0])])
|
| 217 |
+
|
| 218 |
+
# Extract Spectral Features
|
| 219 |
+
spectral_centroids = pd.Series(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)), index=['spectral_centroid'])
|
| 220 |
+
spectral_rolloff = pd.Series(np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)), index=['spectral_rolloff'])
|
| 221 |
+
spectral_flux = pd.Series(np.mean(librosa.onset.onset_strength(y=y, sr=sr)), index=['spectral_flux'])
|
| 222 |
+
spectral_contrast = pd.Series(np.mean(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr), axis=1), index=[f'spectral_contrast_{i}' for i in range(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr).shape[0])])
|
| 223 |
+
|
| 224 |
+
# Extract Pitch
|
| 225 |
+
pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
|
| 226 |
+
pitch_mean = pd.Series(np.mean(pitches[pitches != 0]), index=['pitch_mean']) # Average only non-zero values
|
| 227 |
+
|
| 228 |
+
# Extract Zero Crossings
|
| 229 |
+
zero_crossings = pd.Series(np.mean(librosa.feature.zero_crossing_rate(y)), index=['zero_crossings'])
|
| 230 |
+
|
| 231 |
+
# Combine all features into a single Series
|
| 232 |
+
features = pd.concat([mfccs_mean, spectral_centroids, spectral_rolloff, spectral_flux, spectral_contrast, pitch_mean, zero_crossings])
|
| 233 |
+
return features
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
@app.post("/mlp")
|
| 237 |
+
async def handle_audio(file: UploadFile = File(...)):
|
| 238 |
+
try:
|
| 239 |
+
# Ensure that we are handling an MP3 file
|
| 240 |
+
if file.content_type not in ["audio/mpeg", "audio/wav", "audio/mp3"]:
|
| 241 |
+
raise HTTPException(status_code=400, detail="Invalid file type. Please upload an MP3 file.")
|
| 242 |
+
|
| 243 |
+
# Read the file's content
|
| 244 |
+
contents = await file.read()
|
| 245 |
+
temp_filename = f"app/{uuid4().hex}.mp3"
|
| 246 |
+
|
| 247 |
+
# Save file to a temporary file if needed or process directly from memory
|
| 248 |
+
with open(temp_filename, "wb") as f:
|
| 249 |
+
f.write(contents)
|
| 250 |
+
|
| 251 |
+
preprocess_audio(temp_filename, 'app')
|
| 252 |
+
|
| 253 |
+
# Here you would add the feature extraction logic
|
| 254 |
+
features = extract_features(temp_filename)
|
| 255 |
+
print("Extracted Features:", features)
|
| 256 |
+
|
| 257 |
+
features = mlp_scaler.fit_transform(features)
|
| 258 |
+
features = mlp_pca.fit_transform(features)
|
| 259 |
+
|
| 260 |
+
# Dummy example to proceed with an inference
|
| 261 |
+
results = mlp_model.predict(features)
|
| 262 |
+
|
| 263 |
+
# Clean up (optional, especially if dealing with large files or sensitive data)
|
| 264 |
+
os.remove(temp_filename)
|
| 265 |
+
|
| 266 |
+
return {"message": "File processed successfully", "prediction": results.tolist()}
|
| 267 |
+
except Exception as e:
|
| 268 |
+
# Handle possible exceptions
|
| 269 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 270 |
+
|
| 271 |
# if __name__ == "__main__":
|
| 272 |
# uvicorn.run("main:app", host="0.0.0.0", port=8080, reload=False)
|
app/mlp_model.pkl
ADDED
|
Binary file (210 kB). View file
|
|
|
app/pca.pkl
ADDED
|
Binary file (34.5 kB). View file
|
|
|
app/scaler.pkl
ADDED
|
Binary file (378 kB). View file
|
|
|
requirements.txt
CHANGED
|
@@ -10,4 +10,8 @@ pymongo>=3.11
|
|
| 10 |
tiktoken==0.6.0
|
| 11 |
langchain-openai==0.0.8
|
| 12 |
python-dotenv
|
| 13 |
-
upstash-redis
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
tiktoken==0.6.0
|
| 11 |
langchain-openai==0.0.8
|
| 12 |
python-dotenv
|
| 13 |
+
upstash-redis
|
| 14 |
+
librosa
|
| 15 |
+
soundfile
|
| 16 |
+
opensmile
|
| 17 |
+
eyeD3
|