Mohssinibra commited on
Commit
0a5cfb8
·
verified ·
1 Parent(s): 62d8bca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -11
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  import librosa
3
  import numpy as np
 
4
  from sklearn.preprocessing import StandardScaler
5
  from sklearn.cluster import KMeans
6
  from transformers import pipeline
@@ -31,22 +32,31 @@ def process_audio(audio_path):
31
  speaker_labels = kmeans.fit_predict(mfccs_scaled)
32
  print(f"Clustering terminé, {len(set(speaker_labels))} locuteurs détectés.")
33
 
34
- # Segmentation et transcription
35
- transcriptions = []
36
  segment_duration = len(audio) // len(speaker_labels)
37
 
38
- print("Début de la transcription...")
39
- for i in range(0, len(audio), sr * 5):
40
- segment = audio[i : i + sr * 5]
41
- if len(segment) < sr:
 
 
 
 
 
 
42
  continue
 
 
 
 
 
 
43
 
44
- transcription = stt_pipeline(segment) # Transcription
45
- transcriptions.append(f"Speaker {speaker_labels[i // segment_duration]}: {transcription['text']}")
46
- print(f"Segment {i // sr}-{(i + sr * 5) // sr}s transcrit.")
47
 
48
- print("Transcription terminée !")
49
- return "\n".join(transcriptions)
50
 
51
  except Exception as e:
52
  print(f"Erreur : {e}")
 
1
  import gradio as gr
2
  import librosa
3
  import numpy as np
4
+ import soundfile as sf
5
  from sklearn.preprocessing import StandardScaler
6
  from sklearn.cluster import KMeans
7
  from transformers import pipeline
 
32
  speaker_labels = kmeans.fit_predict(mfccs_scaled)
33
  print(f"Clustering terminé, {len(set(speaker_labels))} locuteurs détectés.")
34
 
35
+ # Regrouper les segments audio par speaker
36
+ speaker_audio = {speaker: [] for speaker in set(speaker_labels)}
37
  segment_duration = len(audio) // len(speaker_labels)
38
 
39
+ for i in range(len(speaker_labels)):
40
+ start = i * segment_duration
41
+ end = start + segment_duration
42
+ speaker_id = speaker_labels[i]
43
+ speaker_audio[speaker_id].extend(audio[start:end])
44
+
45
+ # Transcrire les segments fusionnés
46
+ result = []
47
+ for speaker, audio_segment in speaker_audio.items():
48
+ if len(audio_segment) == 0:
49
  continue
50
+
51
+ temp_filename = f"temp_speaker_{speaker}.wav"
52
+ sf.write(temp_filename, np.array(audio_segment), sr) # Sauvegarder le segment
53
+
54
+ transcription = stt_pipeline(temp_filename) # Transcrire
55
+ result.append(f"Speaker {speaker}: {transcription['text']}")
56
 
57
+ print(f"Transcription Speaker {speaker} terminée.")
 
 
58
 
59
+ return "\n".join(result)
 
60
 
61
  except Exception as e:
62
  print(f"Erreur : {e}")