Spaces:
Running
Running
deveix
commited on
Commit
·
bbf7597
1
Parent(s):
886c1e1
mlp
Browse files- app/label_encoder.pkl +0 -0
- app/main.py +142 -1
- app/mlp_model.pkl +0 -0
- app/pca.pkl +0 -0
- app/scaler.pkl +0 -0
- requirements.txt +5 -1
app/label_encoder.pkl
ADDED
Binary file (2.15 kB). View file
|
|
app/main.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from fastapi import FastAPI,
|
2 |
from pydantic import BaseModel
|
3 |
import os
|
4 |
from pymongo import MongoClient
|
@@ -7,6 +7,25 @@ from langchain_community.vectorstores import MongoDBAtlasVectorSearch
|
|
7 |
import uvicorn
|
8 |
from dotenv import load_dotenv
|
9 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
load_dotenv()
|
12 |
|
@@ -127,5 +146,127 @@ async def get_answer(item: Item, token: str = Depends(verify_token)):
|
|
127 |
# If there's an error, return a 500 error with the error's details
|
128 |
raise HTTPException(status_code=500, detail=str(e))
|
129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
# if __name__ == "__main__":
|
131 |
# uvicorn.run("main:app", host="0.0.0.0", port=8080, reload=False)
|
|
|
1 |
+
from fastapi import FastAPI, File, UploadFile, HTTPException, Depends
|
2 |
from pydantic import BaseModel
|
3 |
import os
|
4 |
from pymongo import MongoClient
|
|
|
7 |
import uvicorn
|
8 |
from dotenv import load_dotenv
|
9 |
from fastapi.middleware.cors import CORSMiddleware
|
10 |
+
from uuid import uuid4
|
11 |
+
|
12 |
+
from typing import Optional
|
13 |
+
import joblib
|
14 |
+
import librosa
|
15 |
+
import numpy as np
|
16 |
+
|
17 |
+
import pandas as pd
|
18 |
+
import numpy as np
|
19 |
+
import matplotlib.pylab as plt
|
20 |
+
import seaborn as sns
|
21 |
+
|
22 |
+
from glob import glob
|
23 |
+
import librosa.display
|
24 |
+
import IPython.display as ipd
|
25 |
+
import soundfile as sf
|
26 |
+
|
27 |
+
from itertools import cycle
|
28 |
+
|
29 |
|
30 |
load_dotenv()
|
31 |
|
|
|
146 |
# If there's an error, return a 500 error with the error's details
|
147 |
raise HTTPException(status_code=500, detail=str(e))
|
148 |
|
149 |
+
# mlp
|
150 |
+
mlp_model = joblib.load('app/mlp_model.pkl')
|
151 |
+
mlp_pca = joblib.load('app/pca.pkl')
|
152 |
+
mlp_scaler = joblib.load('app/scaler.pkl')
|
153 |
+
mlp_label_encoder = joblib.load('app/label_encoder.pkl')
|
154 |
+
|
155 |
+
def preprocess_audio(path, save_dir):
|
156 |
+
y, sr = librosa.load(path)
|
157 |
+
|
158 |
+
# remove silence
|
159 |
+
intervals = librosa.effects.split(y, top_db=20)
|
160 |
+
# Concatenate non-silent intervals
|
161 |
+
y_no_gaps = np.concatenate([y[start:end] for start, end in intervals])
|
162 |
+
|
163 |
+
file_name_without_extension = os.path.basename(path).split('.')[0]
|
164 |
+
y_trimmed, _ = librosa.effects.trim(y_no_gaps, top_db = 20)
|
165 |
+
D = librosa.stft(y)
|
166 |
+
S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
|
167 |
+
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128*2,)
|
168 |
+
S_db_mel = librosa.amplitude_to_db(np.abs(S), ref=np.max)
|
169 |
+
|
170 |
+
# Apply noise reduction (example using spectral subtraction)
|
171 |
+
y_denoised = librosa.effects.preemphasis(y_trimmed)
|
172 |
+
|
173 |
+
# Apply dynamic range compression
|
174 |
+
y_compressed = librosa.effects.preemphasis(y_denoised)
|
175 |
+
|
176 |
+
# Augmentation (example of time stretching)
|
177 |
+
# y_stretched = librosa.effects.time_stretch(y_compressed, rate=1.2)
|
178 |
+
|
179 |
+
# Silence Removal
|
180 |
+
y_silence_removed, _ = librosa.effects.trim(y_compressed)
|
181 |
+
|
182 |
+
# Equalization (example: apply high-pass filter)
|
183 |
+
y_equalized = librosa.effects.preemphasis(y_silence_removed)
|
184 |
+
|
185 |
+
# Define target sample rate
|
186 |
+
target_sr = sr
|
187 |
+
|
188 |
+
# # Data Augmentation (example: pitch shifting)
|
189 |
+
# y_pitch_shifted = librosa.effects.pitch_shift(y_normalized, sr=target_sr, n_steps=2)
|
190 |
+
|
191 |
+
|
192 |
+
# Split audio into non-silent intervals
|
193 |
+
|
194 |
+
|
195 |
+
# Normalize the audio signal
|
196 |
+
y_normalized = librosa.util.normalize(y_equalized)
|
197 |
+
|
198 |
+
# Feature Extraction (example: MFCCs)
|
199 |
+
# mfccs = librosa.feature.mfcc(y=y_normalized, sr=target_sr, n_mfcc=20)
|
200 |
+
|
201 |
+
|
202 |
+
output_file_path = os.path.join(save_dir, f"{file_name_without_extension}.wav")
|
203 |
+
|
204 |
+
# Write the audio data to the output file in .wav format
|
205 |
+
sf.write(output_file_path, y_normalized, target_sr)
|
206 |
+
|
207 |
+
return 'success'
|
208 |
+
|
209 |
+
|
210 |
+
def extract_features(file_path):
|
211 |
+
# Load the audio file
|
212 |
+
y, sr = librosa.load(file_path, sr=None, dtype=np.float32)
|
213 |
+
|
214 |
+
# Extract MFCCs
|
215 |
+
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
|
216 |
+
mfccs_mean = pd.Series(mfccs.mean(axis=1), index=[f'mfcc_{i}' for i in range(mfccs.shape[0])])
|
217 |
+
|
218 |
+
# Extract Spectral Features
|
219 |
+
spectral_centroids = pd.Series(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)), index=['spectral_centroid'])
|
220 |
+
spectral_rolloff = pd.Series(np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)), index=['spectral_rolloff'])
|
221 |
+
spectral_flux = pd.Series(np.mean(librosa.onset.onset_strength(y=y, sr=sr)), index=['spectral_flux'])
|
222 |
+
spectral_contrast = pd.Series(np.mean(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr), axis=1), index=[f'spectral_contrast_{i}' for i in range(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr).shape[0])])
|
223 |
+
|
224 |
+
# Extract Pitch
|
225 |
+
pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
|
226 |
+
pitch_mean = pd.Series(np.mean(pitches[pitches != 0]), index=['pitch_mean']) # Average only non-zero values
|
227 |
+
|
228 |
+
# Extract Zero Crossings
|
229 |
+
zero_crossings = pd.Series(np.mean(librosa.feature.zero_crossing_rate(y)), index=['zero_crossings'])
|
230 |
+
|
231 |
+
# Combine all features into a single Series
|
232 |
+
features = pd.concat([mfccs_mean, spectral_centroids, spectral_rolloff, spectral_flux, spectral_contrast, pitch_mean, zero_crossings])
|
233 |
+
return features
|
234 |
+
|
235 |
+
|
236 |
+
@app.post("/mlp")
|
237 |
+
async def handle_audio(file: UploadFile = File(...)):
|
238 |
+
try:
|
239 |
+
# Ensure that we are handling an MP3 file
|
240 |
+
if file.content_type not in ["audio/mpeg", "audio/wav", "audio/mp3"]:
|
241 |
+
raise HTTPException(status_code=400, detail="Invalid file type. Please upload an MP3 file.")
|
242 |
+
|
243 |
+
# Read the file's content
|
244 |
+
contents = await file.read()
|
245 |
+
temp_filename = f"app/{uuid4().hex}.mp3"
|
246 |
+
|
247 |
+
# Save file to a temporary file if needed or process directly from memory
|
248 |
+
with open(temp_filename, "wb") as f:
|
249 |
+
f.write(contents)
|
250 |
+
|
251 |
+
preprocess_audio(temp_filename, 'app')
|
252 |
+
|
253 |
+
# Here you would add the feature extraction logic
|
254 |
+
features = extract_features(temp_filename)
|
255 |
+
print("Extracted Features:", features)
|
256 |
+
|
257 |
+
features = mlp_scaler.fit_transform(features)
|
258 |
+
features = mlp_pca.fit_transform(features)
|
259 |
+
|
260 |
+
# Dummy example to proceed with an inference
|
261 |
+
results = mlp_model.predict(features)
|
262 |
+
|
263 |
+
# Clean up (optional, especially if dealing with large files or sensitive data)
|
264 |
+
os.remove(temp_filename)
|
265 |
+
|
266 |
+
return {"message": "File processed successfully", "prediction": results.tolist()}
|
267 |
+
except Exception as e:
|
268 |
+
# Handle possible exceptions
|
269 |
+
raise HTTPException(status_code=500, detail=str(e))
|
270 |
+
|
271 |
# if __name__ == "__main__":
|
272 |
# uvicorn.run("main:app", host="0.0.0.0", port=8080, reload=False)
|
app/mlp_model.pkl
ADDED
Binary file (210 kB). View file
|
|
app/pca.pkl
ADDED
Binary file (34.5 kB). View file
|
|
app/scaler.pkl
ADDED
Binary file (378 kB). View file
|
|
requirements.txt
CHANGED
@@ -10,4 +10,8 @@ pymongo>=3.11
|
|
10 |
tiktoken==0.6.0
|
11 |
langchain-openai==0.0.8
|
12 |
python-dotenv
|
13 |
-
upstash-redis
|
|
|
|
|
|
|
|
|
|
10 |
tiktoken==0.6.0
|
11 |
langchain-openai==0.0.8
|
12 |
python-dotenv
|
13 |
+
upstash-redis
|
14 |
+
librosa
|
15 |
+
soundfile
|
16 |
+
opensmile
|
17 |
+
eyeD3
|