Spaces:
Sleeping
Sleeping
deveix
commited on
Commit
·
3dddc6f
1
Parent(s):
491a059
change models
Browse files- app/main.py +57 -40
app/main.py
CHANGED
|
@@ -182,66 +182,83 @@ async def get_answer(item: Item, token: str = Depends(verify_token)):
|
|
| 182 |
raise HTTPException(status_code=500, detail=str(e))
|
| 183 |
|
| 184 |
# random forest
|
| 185 |
-
model = joblib.load('app/
|
| 186 |
pca = joblib.load('app/pca.pkl')
|
| 187 |
-
scaler = joblib.load('app/
|
| 188 |
-
label_encoder = joblib.load('app/
|
| 189 |
|
| 190 |
def preprocess_audio(audio_data, rate):
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
# audio_data = np.concatenate([audio_data[start:end] for start, end in intervals])
|
| 196 |
|
| 197 |
-
|
| 198 |
audio_data, _ = librosa.effects.trim(audio_data)
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
#
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
|
| 208 |
-
|
| 209 |
-
# y_denoised = librosa.effects.preemphasis(y_trimmed)
|
| 210 |
|
| 211 |
-
# # Apply dynamic range compression
|
| 212 |
-
# y_compressed = librosa.effects.preemphasis(y_denoised)
|
| 213 |
|
| 214 |
-
# # Augmentation (example of time stretching)
|
| 215 |
-
# # y_stretched = librosa.effects.time_stretch(y_compressed, rate=1.2)
|
| 216 |
|
| 217 |
-
# # Silence Removal
|
| 218 |
-
# y_silence_removed, _ = librosa.effects.trim(y_compressed)
|
| 219 |
|
| 220 |
-
# # Equalization (example: apply high-pass filter)
|
| 221 |
-
# y_equalized = librosa.effects.preemphasis(y_silence_removed)
|
| 222 |
|
| 223 |
-
# # Define target sample rate
|
| 224 |
-
# target_sr = sr
|
| 225 |
|
| 226 |
-
# # Data Augmentation (example: pitch shifting)
|
| 227 |
-
# y_pitch_shifted = librosa.effects.pitch_shift(y_normalized, sr=target_sr, n_steps=2)
|
| 228 |
|
| 229 |
|
| 230 |
-
|
| 231 |
|
| 232 |
|
| 233 |
-
|
| 234 |
-
|
| 235 |
|
| 236 |
-
|
| 237 |
-
# mfccs = librosa.feature.mfcc(y=y_normalized, sr=target_sr, n_mfcc=20)
|
| 238 |
|
| 239 |
-
|
| 240 |
|
| 241 |
-
|
| 242 |
-
|
| 243 |
|
| 244 |
-
|
| 245 |
|
| 246 |
# smile = opensmile.Smile(
|
| 247 |
# feature_set=opensmile.FeatureSet.ComParE_2016,
|
|
|
|
| 182 |
raise HTTPException(status_code=500, detail=str(e))
|
| 183 |
|
| 184 |
# random forest
|
| 185 |
+
model = joblib.load('app/1713696933.326759_trained_model.joblib')
|
| 186 |
pca = joblib.load('app/pca.pkl')
|
| 187 |
+
scaler = joblib.load('app/1713696947.894978_scaler.joblib')
|
| 188 |
+
label_encoder = joblib.load('app/1713696954.9487948_label_encoder.joblib')
|
| 189 |
|
| 190 |
def preprocess_audio(audio_data, rate):
|
| 191 |
+
# Resample first if the target rate is lower to reduce data size for subsequent operations
|
| 192 |
+
if rate > default_sample_rate:
|
| 193 |
+
audio_data = librosa.resample(audio_data, orig_sr=rate, target_sr=default_sample_rate)
|
| 194 |
+
rate = default_sample_rate
|
|
|
|
| 195 |
|
| 196 |
+
# Trim silence before applying computationally expensive noise reduction
|
| 197 |
audio_data, _ = librosa.effects.trim(audio_data)
|
| 198 |
+
|
| 199 |
+
# Normalize the audio data
|
| 200 |
+
audio_data = librosa.util.normalize(audio_data)
|
| 201 |
+
|
| 202 |
+
# Apply noise reduction
|
| 203 |
+
audio_data = nr.reduce_noise(y=audio_data, sr=rate)
|
| 204 |
+
|
| 205 |
+
return audio_data, rate
|
| 206 |
+
|
| 207 |
+
# def preprocess_audio(audio_data, rate):
|
| 208 |
+
# audio_data = nr.reduce_noise(y=audio_data, sr=rate)
|
| 209 |
+
# # remove silence
|
| 210 |
+
# # intervals = librosa.effects.split(audio_data, top_db=20)
|
| 211 |
+
# # # Concatenate non-silent intervals
|
| 212 |
+
# # audio_data = np.concatenate([audio_data[start:end] for start, end in intervals])
|
| 213 |
+
|
| 214 |
+
# audio_data = librosa.util.normalize(audio_data)
|
| 215 |
+
# audio_data, _ = librosa.effects.trim(audio_data)
|
| 216 |
+
# audio_data = librosa.resample(audio_data, orig_sr=rate, target_sr=default_sample_rate)
|
| 217 |
+
# rate = default_sample_rate
|
| 218 |
+
|
| 219 |
+
# # y_trimmed, _ = librosa.effects.trim(y_no_gaps, top_db = 20)
|
| 220 |
+
# # D = librosa.stft(y)
|
| 221 |
+
# # S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
|
| 222 |
+
# # S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128*2,)
|
| 223 |
+
# # S_db_mel = librosa.amplitude_to_db(np.abs(S), ref=np.max)
|
| 224 |
|
| 225 |
+
# # Apply noise reduction (example using spectral subtraction)
|
| 226 |
+
# # y_denoised = librosa.effects.preemphasis(y_trimmed)
|
| 227 |
|
| 228 |
+
# # # Apply dynamic range compression
|
| 229 |
+
# # y_compressed = librosa.effects.preemphasis(y_denoised)
|
| 230 |
|
| 231 |
+
# # # Augmentation (example of time stretching)
|
| 232 |
+
# # # y_stretched = librosa.effects.time_stretch(y_compressed, rate=1.2)
|
| 233 |
|
| 234 |
+
# # # Silence Removal
|
| 235 |
+
# # y_silence_removed, _ = librosa.effects.trim(y_compressed)
|
| 236 |
|
| 237 |
+
# # # Equalization (example: apply high-pass filter)
|
| 238 |
+
# # y_equalized = librosa.effects.preemphasis(y_silence_removed)
|
| 239 |
|
| 240 |
+
# # # Define target sample rate
|
| 241 |
+
# # target_sr = sr
|
| 242 |
|
| 243 |
+
# # # Data Augmentation (example: pitch shifting)
|
| 244 |
+
# # y_pitch_shifted = librosa.effects.pitch_shift(y_normalized, sr=target_sr, n_steps=2)
|
| 245 |
|
| 246 |
|
| 247 |
+
# # Split audio into non-silent intervals
|
| 248 |
|
| 249 |
|
| 250 |
+
# # Normalize the audio signal
|
| 251 |
+
# # y_normalized = librosa.util.normalize(y_equalized)
|
| 252 |
|
| 253 |
+
# # Feature Extraction (example: MFCCs)
|
| 254 |
+
# # mfccs = librosa.feature.mfcc(y=y_normalized, sr=target_sr, n_mfcc=20)
|
| 255 |
|
| 256 |
+
# # output_file_path = os.path.join(save_dir, f"{file_name_without_extension}.{extension}")
|
| 257 |
|
| 258 |
+
# # Write the audio data to the output file in .wav format
|
| 259 |
+
# # sf.write(path, y_normalized, target_sr)
|
| 260 |
|
| 261 |
+
# return audio_data, rate
|
| 262 |
|
| 263 |
# smile = opensmile.Smile(
|
| 264 |
# feature_set=opensmile.FeatureSet.ComParE_2016,
|