Spaces:

Mohssinibra
/

speakerDiarization

Running

Mohssinibra commited on Feb 14

Commit

0a5cfb8

verified ·

1 Parent(s): 62d8bca

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 import librosa
 import numpy as np
 from sklearn.preprocessing import StandardScaler
 from sklearn.cluster import KMeans
 from transformers import pipeline
@@ -31,22 +32,31 @@ def process_audio(audio_path):
         speaker_labels = kmeans.fit_predict(mfccs_scaled)
         print(f"Clustering terminé, {len(set(speaker_labels))} locuteurs détectés.")
-        # Segmentation et transcription
-        transcriptions = []
         segment_duration = len(audio) // len(speaker_labels)
-        print("Début de la transcription...")
-        for i in range(0, len(audio), sr * 5):
-            segment = audio[i : i + sr * 5]
-            if len(segment) < sr:
                 continue
-            transcription = stt_pipeline(segment)  # Transcription
-            transcriptions.append(f"Speaker {speaker_labels[i // segment_duration]}: {transcription['text']}")
-            print(f"Segment {i // sr}-{(i + sr * 5) // sr}s transcrit.")
-        print("Transcription terminée !")
-        return "\n".join(transcriptions)
     except Exception as e:
         print(f"Erreur : {e}")

 import gradio as gr
 import librosa
 import numpy as np
+import soundfile as sf
 from sklearn.preprocessing import StandardScaler
 from sklearn.cluster import KMeans
 from transformers import pipeline
         speaker_labels = kmeans.fit_predict(mfccs_scaled)
         print(f"Clustering terminé, {len(set(speaker_labels))} locuteurs détectés.")
+        # Regrouper les segments audio par speaker
+        speaker_audio = {speaker: [] for speaker in set(speaker_labels)}
         segment_duration = len(audio) // len(speaker_labels)
+        for i in range(len(speaker_labels)):
+            start = i * segment_duration
+            end = start + segment_duration
+            speaker_id = speaker_labels[i]
+            speaker_audio[speaker_id].extend(audio[start:end])
+        # Transcrire les segments fusionnés
+        result = []
+        for speaker, audio_segment in speaker_audio.items():
+            if len(audio_segment) == 0:
                 continue
+            temp_filename = f"temp_speaker_{speaker}.wav"
+            sf.write(temp_filename, np.array(audio_segment), sr)  # Sauvegarder le segment
+            transcription = stt_pipeline(temp_filename)  # Transcrire
+            result.append(f"Speaker {speaker}: {transcription['text']}")
+            print(f"Transcription Speaker {speaker} terminée.")
+        return "\n".join(result)
     except Exception as e:
         print(f"Erreur : {e}")