Mohssinibra commited on
Commit
2ee2f68
·
verified ·
1 Parent(s): f276524

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -3
app.py CHANGED
@@ -5,6 +5,7 @@ import soundfile as sf
5
  from sklearn.preprocessing import StandardScaler
6
  from sklearn.cluster import KMeans
7
  from transformers import pipeline
 
8
 
9
  print("Chargement du modèle Wav2Vec2...")
10
  stt_pipeline = pipeline("automatic-speech-recognition", model="boumehdi/wav2vec2-large-xlsr-moroccan-darija")
@@ -18,8 +19,12 @@ def process_audio(audio_path):
18
  audio, sr = librosa.load(audio_path, sr=None, duration=30)
19
  print(f"Audio chargé : {len(audio)} échantillons à {sr} Hz")
20
 
 
 
 
 
21
  # Extraction des MFCC
22
- mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
23
  print(f"MFCC extrait, shape: {mfccs.shape}")
24
 
25
  # Normalisation
@@ -34,13 +39,13 @@ def process_audio(audio_path):
34
 
35
  # Regrouper les segments audio par speaker
36
  speaker_audio = {speaker: [] for speaker in set(speaker_labels)}
37
- segment_duration = len(audio) // len(speaker_labels)
38
 
39
  for i in range(len(speaker_labels)):
40
  start = i * segment_duration
41
  end = start + segment_duration
42
  speaker_id = speaker_labels[i]
43
- speaker_audio[speaker_id].extend(audio[start:end])
44
 
45
  # Transcrire les segments fusionnés
46
  result = []
 
5
  from sklearn.preprocessing import StandardScaler
6
  from sklearn.cluster import KMeans
7
  from transformers import pipeline
8
+ import noisereduce as nr # Ajout de la bibliothèque pour réduire le bruit
9
 
10
  print("Chargement du modèle Wav2Vec2...")
11
  stt_pipeline = pipeline("automatic-speech-recognition", model="boumehdi/wav2vec2-large-xlsr-moroccan-darija")
 
19
  audio, sr = librosa.load(audio_path, sr=None, duration=30)
20
  print(f"Audio chargé : {len(audio)} échantillons à {sr} Hz")
21
 
22
+ # Réduction du bruit (si nécessaire)
23
+ audio_denoised = nr.reduce_noise(y=audio, sr=sr)
24
+ print("Bruit réduit.")
25
+
26
  # Extraction des MFCC
27
+ mfccs = librosa.feature.mfcc(y=audio_denoised, sr=sr, n_mfcc=13)
28
  print(f"MFCC extrait, shape: {mfccs.shape}")
29
 
30
  # Normalisation
 
39
 
40
  # Regrouper les segments audio par speaker
41
  speaker_audio = {speaker: [] for speaker in set(speaker_labels)}
42
+ segment_duration = len(audio_denoised) // len(speaker_labels)
43
 
44
  for i in range(len(speaker_labels)):
45
  start = i * segment_duration
46
  end = start + segment_duration
47
  speaker_id = speaker_labels[i]
48
+ speaker_audio[speaker_id].extend(audio_denoised[start:end])
49
 
50
  # Transcrire les segments fusionnés
51
  result = []