Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
import librosa
|
3 |
import numpy as np
|
|
|
4 |
from sklearn.preprocessing import StandardScaler
|
5 |
from sklearn.cluster import KMeans
|
6 |
from transformers import pipeline
|
@@ -31,22 +32,31 @@ def process_audio(audio_path):
|
|
31 |
speaker_labels = kmeans.fit_predict(mfccs_scaled)
|
32 |
print(f"Clustering terminé, {len(set(speaker_labels))} locuteurs détectés.")
|
33 |
|
34 |
-
#
|
35 |
-
|
36 |
segment_duration = len(audio) // len(speaker_labels)
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
-
|
45 |
-
transcriptions.append(f"Speaker {speaker_labels[i // segment_duration]}: {transcription['text']}")
|
46 |
-
print(f"Segment {i // sr}-{(i + sr * 5) // sr}s transcrit.")
|
47 |
|
48 |
-
|
49 |
-
return "\n".join(transcriptions)
|
50 |
|
51 |
except Exception as e:
|
52 |
print(f"Erreur : {e}")
|
|
|
1 |
import gradio as gr
|
2 |
import librosa
|
3 |
import numpy as np
|
4 |
+
import soundfile as sf
|
5 |
from sklearn.preprocessing import StandardScaler
|
6 |
from sklearn.cluster import KMeans
|
7 |
from transformers import pipeline
|
|
|
32 |
speaker_labels = kmeans.fit_predict(mfccs_scaled)
|
33 |
print(f"Clustering terminé, {len(set(speaker_labels))} locuteurs détectés.")
|
34 |
|
35 |
+
# Regrouper les segments audio par speaker
|
36 |
+
speaker_audio = {speaker: [] for speaker in set(speaker_labels)}
|
37 |
segment_duration = len(audio) // len(speaker_labels)
|
38 |
|
39 |
+
for i in range(len(speaker_labels)):
|
40 |
+
start = i * segment_duration
|
41 |
+
end = start + segment_duration
|
42 |
+
speaker_id = speaker_labels[i]
|
43 |
+
speaker_audio[speaker_id].extend(audio[start:end])
|
44 |
+
|
45 |
+
# Transcrire les segments fusionnés
|
46 |
+
result = []
|
47 |
+
for speaker, audio_segment in speaker_audio.items():
|
48 |
+
if len(audio_segment) == 0:
|
49 |
continue
|
50 |
+
|
51 |
+
temp_filename = f"temp_speaker_{speaker}.wav"
|
52 |
+
sf.write(temp_filename, np.array(audio_segment), sr) # Sauvegarder le segment
|
53 |
+
|
54 |
+
transcription = stt_pipeline(temp_filename) # Transcrire
|
55 |
+
result.append(f"Speaker {speaker}: {transcription['text']}")
|
56 |
|
57 |
+
print(f"Transcription Speaker {speaker} terminée.")
|
|
|
|
|
58 |
|
59 |
+
return "\n".join(result)
|
|
|
60 |
|
61 |
except Exception as e:
|
62 |
print(f"Erreur : {e}")
|