deveix commited on
Commit
bbf7597
·
1 Parent(s): 886c1e1
app/label_encoder.pkl ADDED
Binary file (2.15 kB). View file
 
app/main.py CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import FastAPI, HTTPException, Header, Depends
2
  from pydantic import BaseModel
3
  import os
4
  from pymongo import MongoClient
@@ -7,6 +7,25 @@ from langchain_community.vectorstores import MongoDBAtlasVectorSearch
7
  import uvicorn
8
  from dotenv import load_dotenv
9
  from fastapi.middleware.cors import CORSMiddleware
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  load_dotenv()
12
 
@@ -127,5 +146,127 @@ async def get_answer(item: Item, token: str = Depends(verify_token)):
127
  # If there's an error, return a 500 error with the error's details
128
  raise HTTPException(status_code=500, detail=str(e))
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  # if __name__ == "__main__":
131
  # uvicorn.run("main:app", host="0.0.0.0", port=8080, reload=False)
 
1
+ from fastapi import FastAPI, File, UploadFile, HTTPException, Depends
2
  from pydantic import BaseModel
3
  import os
4
  from pymongo import MongoClient
 
7
  import uvicorn
8
  from dotenv import load_dotenv
9
  from fastapi.middleware.cors import CORSMiddleware
10
+ from uuid import uuid4
11
+
12
+ from typing import Optional
13
+ import joblib
14
+ import librosa
15
+ import numpy as np
16
+
17
+ import pandas as pd
18
+ import numpy as np
19
+ import matplotlib.pylab as plt
20
+ import seaborn as sns
21
+
22
+ from glob import glob
23
+ import librosa.display
24
+ import IPython.display as ipd
25
+ import soundfile as sf
26
+
27
+ from itertools import cycle
28
+
29
 
30
  load_dotenv()
31
 
 
146
  # If there's an error, return a 500 error with the error's details
147
  raise HTTPException(status_code=500, detail=str(e))
148
 
149
+ # mlp
150
+ mlp_model = joblib.load('app/mlp_model.pkl')
151
+ mlp_pca = joblib.load('app/pca.pkl')
152
+ mlp_scaler = joblib.load('app/scaler.pkl')
153
+ mlp_label_encoder = joblib.load('app/label_encoder.pkl')
154
+
155
+ def preprocess_audio(path, save_dir):
156
+ y, sr = librosa.load(path)
157
+
158
+ # remove silence
159
+ intervals = librosa.effects.split(y, top_db=20)
160
+ # Concatenate non-silent intervals
161
+ y_no_gaps = np.concatenate([y[start:end] for start, end in intervals])
162
+
163
+ file_name_without_extension = os.path.basename(path).split('.')[0]
164
+ y_trimmed, _ = librosa.effects.trim(y_no_gaps, top_db = 20)
165
+ D = librosa.stft(y)
166
+ S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
167
+ S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128*2,)
168
+ S_db_mel = librosa.amplitude_to_db(np.abs(S), ref=np.max)
169
+
170
+ # Apply noise reduction (example using spectral subtraction)
171
+ y_denoised = librosa.effects.preemphasis(y_trimmed)
172
+
173
+ # Apply dynamic range compression
174
+ y_compressed = librosa.effects.preemphasis(y_denoised)
175
+
176
+ # Augmentation (example of time stretching)
177
+ # y_stretched = librosa.effects.time_stretch(y_compressed, rate=1.2)
178
+
179
+ # Silence Removal
180
+ y_silence_removed, _ = librosa.effects.trim(y_compressed)
181
+
182
+ # Equalization (example: apply high-pass filter)
183
+ y_equalized = librosa.effects.preemphasis(y_silence_removed)
184
+
185
+ # Define target sample rate
186
+ target_sr = sr
187
+
188
+ # # Data Augmentation (example: pitch shifting)
189
+ # y_pitch_shifted = librosa.effects.pitch_shift(y_normalized, sr=target_sr, n_steps=2)
190
+
191
+
192
+ # Split audio into non-silent intervals
193
+
194
+
195
+ # Normalize the audio signal
196
+ y_normalized = librosa.util.normalize(y_equalized)
197
+
198
+ # Feature Extraction (example: MFCCs)
199
+ # mfccs = librosa.feature.mfcc(y=y_normalized, sr=target_sr, n_mfcc=20)
200
+
201
+
202
+ output_file_path = os.path.join(save_dir, f"{file_name_without_extension}.wav")
203
+
204
+ # Write the audio data to the output file in .wav format
205
+ sf.write(output_file_path, y_normalized, target_sr)
206
+
207
+ return 'success'
208
+
209
+
210
+ def extract_features(file_path):
211
+ # Load the audio file
212
+ y, sr = librosa.load(file_path, sr=None, dtype=np.float32)
213
+
214
+ # Extract MFCCs
215
+ mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
216
+ mfccs_mean = pd.Series(mfccs.mean(axis=1), index=[f'mfcc_{i}' for i in range(mfccs.shape[0])])
217
+
218
+ # Extract Spectral Features
219
+ spectral_centroids = pd.Series(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)), index=['spectral_centroid'])
220
+ spectral_rolloff = pd.Series(np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)), index=['spectral_rolloff'])
221
+ spectral_flux = pd.Series(np.mean(librosa.onset.onset_strength(y=y, sr=sr)), index=['spectral_flux'])
222
+ spectral_contrast = pd.Series(np.mean(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr), axis=1), index=[f'spectral_contrast_{i}' for i in range(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr).shape[0])])
223
+
224
+ # Extract Pitch
225
+ pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
226
+ pitch_mean = pd.Series(np.mean(pitches[pitches != 0]), index=['pitch_mean']) # Average only non-zero values
227
+
228
+ # Extract Zero Crossings
229
+ zero_crossings = pd.Series(np.mean(librosa.feature.zero_crossing_rate(y)), index=['zero_crossings'])
230
+
231
+ # Combine all features into a single Series
232
+ features = pd.concat([mfccs_mean, spectral_centroids, spectral_rolloff, spectral_flux, spectral_contrast, pitch_mean, zero_crossings])
233
+ return features
234
+
235
+
236
+ @app.post("/mlp")
237
+ async def handle_audio(file: UploadFile = File(...)):
238
+ try:
239
+ # Ensure that we are handling an MP3 file
240
+ if file.content_type not in ["audio/mpeg", "audio/wav", "audio/mp3"]:
241
+ raise HTTPException(status_code=400, detail="Invalid file type. Please upload an MP3 file.")
242
+
243
+ # Read the file's content
244
+ contents = await file.read()
245
+ temp_filename = f"app/{uuid4().hex}.mp3"
246
+
247
+ # Save file to a temporary file if needed or process directly from memory
248
+ with open(temp_filename, "wb") as f:
249
+ f.write(contents)
250
+
251
+ preprocess_audio(temp_filename, 'app')
252
+
253
+ # Here you would add the feature extraction logic
254
+ features = extract_features(temp_filename)
255
+ print("Extracted Features:", features)
256
+
257
+ features = mlp_scaler.fit_transform(features)
258
+ features = mlp_pca.fit_transform(features)
259
+
260
+ # Dummy example to proceed with an inference
261
+ results = mlp_model.predict(features)
262
+
263
+ # Clean up (optional, especially if dealing with large files or sensitive data)
264
+ os.remove(temp_filename)
265
+
266
+ return {"message": "File processed successfully", "prediction": results.tolist()}
267
+ except Exception as e:
268
+ # Handle possible exceptions
269
+ raise HTTPException(status_code=500, detail=str(e))
270
+
271
  # if __name__ == "__main__":
272
  # uvicorn.run("main:app", host="0.0.0.0", port=8080, reload=False)
app/mlp_model.pkl ADDED
Binary file (210 kB). View file
 
app/pca.pkl ADDED
Binary file (34.5 kB). View file
 
app/scaler.pkl ADDED
Binary file (378 kB). View file
 
requirements.txt CHANGED
@@ -10,4 +10,8 @@ pymongo>=3.11
10
  tiktoken==0.6.0
11
  langchain-openai==0.0.8
12
  python-dotenv
13
- upstash-redis
 
 
 
 
 
10
  tiktoken==0.6.0
11
  langchain-openai==0.0.8
12
  python-dotenv
13
+ upstash-redis
14
+ librosa
15
+ soundfile
16
+ opensmile
17
+ eyeD3