Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import soundfile as sf
|
|
5 |
from sklearn.preprocessing import StandardScaler
|
6 |
from sklearn.cluster import KMeans
|
7 |
from transformers import pipeline
|
|
|
8 |
|
9 |
print("Chargement du modèle Wav2Vec2...")
|
10 |
stt_pipeline = pipeline("automatic-speech-recognition", model="boumehdi/wav2vec2-large-xlsr-moroccan-darija")
|
@@ -18,8 +19,12 @@ def process_audio(audio_path):
|
|
18 |
audio, sr = librosa.load(audio_path, sr=None, duration=30)
|
19 |
print(f"Audio chargé : {len(audio)} échantillons à {sr} Hz")
|
20 |
|
|
|
|
|
|
|
|
|
21 |
# Extraction des MFCC
|
22 |
-
mfccs = librosa.feature.mfcc(y=
|
23 |
print(f"MFCC extrait, shape: {mfccs.shape}")
|
24 |
|
25 |
# Normalisation
|
@@ -34,13 +39,13 @@ def process_audio(audio_path):
|
|
34 |
|
35 |
# Regrouper les segments audio par speaker
|
36 |
speaker_audio = {speaker: [] for speaker in set(speaker_labels)}
|
37 |
-
segment_duration = len(
|
38 |
|
39 |
for i in range(len(speaker_labels)):
|
40 |
start = i * segment_duration
|
41 |
end = start + segment_duration
|
42 |
speaker_id = speaker_labels[i]
|
43 |
-
speaker_audio[speaker_id].extend(
|
44 |
|
45 |
# Transcrire les segments fusionnés
|
46 |
result = []
|
|
|
5 |
from sklearn.preprocessing import StandardScaler
|
6 |
from sklearn.cluster import KMeans
|
7 |
from transformers import pipeline
|
8 |
+
import noisereduce as nr # Ajout de la bibliothèque pour réduire le bruit
|
9 |
|
10 |
print("Chargement du modèle Wav2Vec2...")
|
11 |
stt_pipeline = pipeline("automatic-speech-recognition", model="boumehdi/wav2vec2-large-xlsr-moroccan-darija")
|
|
|
19 |
audio, sr = librosa.load(audio_path, sr=None, duration=30)
|
20 |
print(f"Audio chargé : {len(audio)} échantillons à {sr} Hz")
|
21 |
|
22 |
+
# Réduction du bruit (si nécessaire)
|
23 |
+
audio_denoised = nr.reduce_noise(y=audio, sr=sr)
|
24 |
+
print("Bruit réduit.")
|
25 |
+
|
26 |
# Extraction des MFCC
|
27 |
+
mfccs = librosa.feature.mfcc(y=audio_denoised, sr=sr, n_mfcc=13)
|
28 |
print(f"MFCC extrait, shape: {mfccs.shape}")
|
29 |
|
30 |
# Normalisation
|
|
|
39 |
|
40 |
# Regrouper les segments audio par speaker
|
41 |
speaker_audio = {speaker: [] for speaker in set(speaker_labels)}
|
42 |
+
segment_duration = len(audio_denoised) // len(speaker_labels)
|
43 |
|
44 |
for i in range(len(speaker_labels)):
|
45 |
start = i * segment_duration
|
46 |
end = start + segment_duration
|
47 |
speaker_id = speaker_labels[i]
|
48 |
+
speaker_audio[speaker_id].extend(audio_denoised[start:end])
|
49 |
|
50 |
# Transcrire les segments fusionnés
|
51 |
result = []
|