Cyr-CK commited on
Commit
233adb5
·
1 Parent(s): cfd1552

Updated app

Browse files
__init__.py ADDED
File without changes
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
2
  from streamlit_option_menu import option_menu
3
- from views.application import application
 
4
  from views.about import about
5
 
6
  if "model_loaded" not in st.session_state:
@@ -17,8 +18,8 @@ with st.sidebar:
17
  # Navigation menu with icons
18
  selected_tab = option_menu(
19
  menu_title=None, # Added menu_title parameter
20
- options=["Application", "About"],
21
- icons=["robot", "bar-chart", "robot"],
22
  menu_icon="cast",
23
  default_index=0,
24
  # styles={
@@ -30,8 +31,10 @@ with st.sidebar:
30
  )
31
 
32
 
33
- if selected_tab == "Application":
34
- application()
 
 
35
  elif selected_tab == "About":
36
  about()
37
 
 
1
  import streamlit as st
2
  from streamlit_option_menu import option_menu
3
+ from views.studio import studio
4
+ from views.emotion_analysis import emotion_analysis
5
  from views.about import about
6
 
7
  if "model_loaded" not in st.session_state:
 
18
  # Navigation menu with icons
19
  selected_tab = option_menu(
20
  menu_title=None, # Added menu_title parameter
21
+ options=["Studio", "Emotion Analysis", "About"],
22
+ icons=["record-circle", "robot", "info-circle"],
23
  menu_icon="cast",
24
  default_index=0,
25
  # styles={
 
31
  )
32
 
33
 
34
+ if selected_tab == "Studio":
35
+ studio()
36
+ elif selected_tab == "Emotion Analysis":
37
+ emotion_analysis()
38
  elif selected_tab == "About":
39
  about()
40
 
src/__init__.py ADDED
File without changes
src/config.py CHANGED
@@ -11,6 +11,7 @@ if not HF_API_KEY:
11
 
12
  # Labels d'émotions
13
  LABELS = {"colere": 0, "neutre": 1, "joie": 2}
 
14
  NUM_LABELS = len(LABELS)
15
 
16
  # Choisir le device
 
11
 
12
  # Labels d'émotions
13
  LABELS = {"colere": 0, "neutre": 1, "joie": 2}
14
+ LABELS = ["colere", "neutre", "joie"]
15
  NUM_LABELS = len(LABELS)
16
 
17
  # Choisir le device
src/model/__init__.py CHANGED
@@ -1 +0,0 @@
1
-
 
 
src/model/emotion_classifier.py CHANGED
@@ -1,15 +1,32 @@
 
1
  import torch.nn as nn
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  class EmotionClassifier(nn.Module):
4
- def __init__(self, feature_dim, num_labels):
5
  super(EmotionClassifier, self).__init__()
6
- self.fc1 = nn.Linear(feature_dim, 256)
7
- self.relu = nn.ReLU()
8
- self.dropout = nn.Dropout(0.3)
9
- self.fc2 = nn.Linear(256, num_labels)
10
-
11
- def forward(self, x):
12
- x = self.fc1(x)
13
- x = self.relu(x)
14
- x = self.dropout(x)
15
- return self.fc2(x)
 
1
+ import torch
2
  import torch.nn as nn
3
 
4
+ # Prédit 33% environ partout (dans le cas 3 classes)
5
+
6
+ # class EmotionClassifier(nn.Module):
7
+ # def __init__(self, feature_dim, num_labels):
8
+ # super(EmotionClassifier, self).__init__()
9
+ # self.fc1 = nn.Linear(feature_dim, 256)
10
+ # self.relu = nn.ReLU()
11
+ # self.dropout = nn.Dropout(0.3)
12
+ # self.fc2 = nn.Linear(256, num_labels)
13
+
14
+ # def forward(self, x):
15
+ # x = self.fc1(x)
16
+ # x = self.relu(x)
17
+ # x = self.dropout(x)
18
+ # return self.fc2(x)
19
+
20
+
21
+
22
  class EmotionClassifier(nn.Module):
23
+ def __init__(self, feature_dim, num_labels=3):
24
  super(EmotionClassifier, self).__init__()
25
+ self.fc = nn.Linear(feature_dim.config.hidden_size, num_labels)
26
+ self.softmax = nn.Softmax(dim=1)
27
+
28
+ def forward(self, input_values):
29
+ outputs = self(input_values).last_hidden_state
30
+ pooled_output = torch.mean(outputs, dim=1)
31
+ logits = self.fc(pooled_output)
32
+ return self.softmax(logits)
 
 
src/model/{feature_extrator.py → feature_extractor.py} RENAMED
@@ -1,6 +1,6 @@
1
  import torch
2
  from transformers import Wav2Vec2Model, Wav2Vec2Processor
3
- from config import MODEL_NAME, DEVICE
4
 
5
  processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
6
  feature_extractor = Wav2Vec2Model.from_pretrained(MODEL_NAME).to(DEVICE)
 
1
  import torch
2
  from transformers import Wav2Vec2Model, Wav2Vec2Processor
3
+ from src.config import MODEL_NAME, DEVICE
4
 
5
  processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
6
  feature_extractor = Wav2Vec2Model.from_pretrained(MODEL_NAME).to(DEVICE)
src/predict.py CHANGED
@@ -1,47 +1,63 @@
1
  import torch
2
  import torchaudio
 
3
  import soundfile as sf
4
  import numpy as np
5
- from model.emotion_classifier import EmotionClassifier
6
- from model.feature_extrator import feature_extractor, processor
7
- from utils.preprocessing import resampler
8
- from config import DEVICE, LABELS
9
  import os
10
 
11
 
12
  # Charger le modèle sauvegardé
13
  classifier = EmotionClassifier(feature_extractor.config.hidden_size, len(LABELS)).to(DEVICE)
14
- classifier.load_state_dict(torch.load(os.path.join("best_emotion_model.pth"), map_location=torch.device(DEVICE)))
15
  classifier.eval()
16
 
17
 
18
  # Fonction de prédiction
19
- def predict_emotion(audio_path):
20
  # Charger l'audio
21
- speech, sample_rate = sf.read(audio_path, dtype="float32")
 
22
 
23
  # Rééchantillonnage si nécessaire
24
- if sample_rate != 16000:
25
- speech = torch.tensor(speech).unsqueeze(0)
26
- speech = resampler(speech).squeeze(0).numpy()
27
 
28
  # Extraire les features
29
- inputs = processor(speech, sampling_rate=16000, return_tensors="pt", padding=True)
30
  input_values = inputs.input_values.to(DEVICE)
31
 
32
  with torch.no_grad():
33
  features = feature_extractor(input_values).last_hidden_state.mean(dim=1)
34
  logits = classifier(features)
35
 
36
- # Obtenir la prédiction
37
- predicted_label = torch.argmax(logits, dim=-1).item()
38
- emotion = list(LABELS.keys())[predicted_label]
39
-
40
- return emotion
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  # Exemple d'utilisation
43
- if __name__ == "__main__":
44
- base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "data"))
45
- audio_file = os.path.join(base_path, "colere", "c1ac.wav")
46
- emotion = predict_emotion(audio_file)
47
- print(f"🎤 L'émotion prédite est : {emotion}")
 
1
  import torch
2
  import torchaudio
3
+ import librosa
4
  import soundfile as sf
5
  import numpy as np
6
+ from src.model.emotion_classifier import EmotionClassifier
7
+ from src.model.feature_extractor import feature_extractor, processor
8
+ from src.utils.preprocessing import resampler
9
+ from src.config import DEVICE, LABELS
10
  import os
11
 
12
 
13
  # Charger le modèle sauvegardé
14
  classifier = EmotionClassifier(feature_extractor.config.hidden_size, len(LABELS)).to(DEVICE)
15
+ classifier.load_state_dict(torch.load(os.path.join("src","model","best_emotion_model.pth"), map_location=torch.device(DEVICE)), strict=False)
16
  classifier.eval()
17
 
18
 
19
  # Fonction de prédiction
20
+ def predict_emotion(speech, output_probs=False, sampling_rate=16000):
21
  # Charger l'audio
22
+ # waveform, sample_rate = librosa.load(speech, sr=None)
23
+ # speech_audio, sample_rate = sf.read(speech, dtype="float32")
24
 
25
  # Rééchantillonnage si nécessaire
26
+ # if sample_rate != sampling_rate:
27
+ # speech = torch.tensor(speech).unsqueeze(0)
28
+ # speech = resampler(speech).squeeze(0).numpy()
29
 
30
  # Extraire les features
31
+ inputs = processor(speech, sampling_rate=sampling_rate, return_tensors="pt", padding=True)
32
  input_values = inputs.input_values.to(DEVICE)
33
 
34
  with torch.no_grad():
35
  features = feature_extractor(input_values).last_hidden_state.mean(dim=1)
36
  logits = classifier(features)
37
 
38
+ if output_probs:
39
+ # Appliquer softmax pour obtenir des probabilités
40
+ probabilities = torch.nn.functional.softmax(logits, dim=-1)
41
+
42
+ # Convertir en numpy array et prendre le premier (et seul) élément
43
+ probabilities = probabilities[0].detach().cpu().numpy()
44
+
45
+ # Créer un dictionnaire associant chaque émotion à sa probabilité
46
+ emotion_probabilities = {emotion: prob for emotion, prob in zip(LABELS, probabilities)}
47
+ # emotion_probabilities = {"emotions": [emotion for emotion in emotion_labels],
48
+ # "probabilities": [prob for prob in probabilities]}
49
+ return emotion_probabilities
50
+ else:
51
+ # Obtenir l'émotion la plus probable (i.e. la prédiction)
52
+ predicted_label = torch.argmax(logits, dim=-1).item()
53
+ emotion = LABELS[predicted_label]
54
+
55
+ return emotion
56
+
57
 
58
  # Exemple d'utilisation
59
+ # if __name__ == "__main__":
60
+ # base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "data"))
61
+ # audio_file = os.path.join(base_path, "colere", "c1ac.wav")
62
+ # emotion = predict_emotion(audio_file)
63
+ # print(f"🎤 L'émotion prédite est : {emotion}")
src/utils/preprocessing.py CHANGED
@@ -2,8 +2,8 @@ import soundfile as sf
2
  import torch
3
  import torchaudio
4
  import numpy as np
5
- from model.feature_extrator import processor
6
- from config import DEVICE
7
 
8
  # Resampler
9
  resampler = torchaudio.transforms.Resample(orig_freq=48_000, new_freq=16_000)
 
2
  import torch
3
  import torchaudio
4
  import numpy as np
5
+ from src.model.feature_extractor import processor # type: ignore
6
+ from src.config import DEVICE
7
 
8
  # Resampler
9
  resampler = torchaudio.transforms.Resample(orig_freq=48_000, new_freq=16_000)
views/about.py CHANGED
@@ -15,7 +15,7 @@ def about():
15
  st.markdown("### Collaborators")
16
  st.write("""
17
  - [Falonne Kpamegan](https://github.com/marinaKpamegan)
18
- - [Nancy](https://github.com/yminanc)
19
- - [Cyril](https://github.com/Cyr-CK)
20
  - [Juan Alfonso](https://github.com/jdalfons)
21
  """)
 
15
  st.markdown("### Collaborators")
16
  st.write("""
17
  - [Falonne Kpamegan](https://github.com/marinaKpamegan)
18
+ - [Nancy Randriamiarijaona](https://github.com/yminanc)
19
+ - [Cyril Kocab](https://github.com/Cyr-CK)
20
  - [Juan Alfonso](https://github.com/jdalfons)
21
  """)
views/application.py DELETED
@@ -1,212 +0,0 @@
1
- import streamlit as st
2
- import pandas as pd
3
- from st_audiorec import st_audiorec
4
- import datetime
5
- import os
6
- import matplotlib.pyplot as plt
7
- import librosa
8
- from src.model.transcriber import transcribe_audio
9
- from predict import predict_emotion
10
-
11
-
12
- DIRECTORY = "audios"
13
- FILE_NAME = "audio.wav"
14
- CHUNK = 1024
15
- # FORMAT = pyaudio.paInt16
16
- CHANNELS = 1
17
- RATE = 16000
18
-
19
- def application():
20
- st.title("SISE ultimate challenge")
21
- st.write("C'est le dernier challenge de la formation SISE.")
22
- st.markdown("""
23
- **Overview:**
24
- - Analyse de logs
25
- - Analyse de données
26
- - Machine learning
27
- """)
28
-
29
- st.markdown("---")
30
-
31
- tab1, tab2, tab3 = st.tabs(["⬆️ Record Audio", "🔈 Realtime Audio", "📝 Transcription"])
32
-
33
- with tab1:
34
- st.header("⬆️ Upload Audio Record")
35
- st.write("Here you can upload a pre-recorded audio.")
36
- audio_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "ogg"])
37
-
38
- if audio_file is not None:
39
-
40
- with open(os.path.join(DIRECTORY,FILE_NAME), "wb") as f:
41
- f.write(audio_file.getbuffer())
42
- st.success(f"Saved file: {FILE_NAME}")
43
-
44
-
45
- start_inference = st.button("Start emotion recogniton","inf_on_upl_btn")
46
- emotion_labels = ["joie", "colère", "neutre"]
47
- colors = ['#f6d60a', '#f71c1c', '#cac8c8']
48
-
49
- if start_inference:
50
- # Configuration Streamlit
51
- with st.spinner("Real-time emotion analysis..."):
52
- # uploaded_file = st.file_uploader("Choisissez un fichier audio", type=["wav", "mp3"])
53
-
54
- if audio_file is not None:
55
- # Charger et rééchantillonner l'audio
56
- audio, sr = librosa.load(audio_file, sr=RATE)
57
- # chunk = audio_file
58
-
59
- # Paramètres de la fenêtre glissante
60
- window_size = 1 # en secondes
61
- hop_length = 0.5 # en secondes
62
-
63
- # Créer un graphique en temps réel
64
- fig, ax = plt.subplots()
65
- lines = [ax.plot([], [], label=emotion)[0] for emotion in emotion_labels]
66
- ax.set_ylim(0, 1)
67
- ax.set_xlim(0, len(audio) / sr)
68
- ax.set_xlabel("Temps (s)")
69
- ax.set_ylabel("Probabilité")
70
- ax.legend()
71
-
72
- chart = st.pyplot(fig)
73
-
74
- scores = [[],[],[]] # 3 émotions pour l'instant
75
-
76
- # Traitement par fenêtre glissante
77
- for i in range(0, len(audio), int(hop_length * sr)):
78
- chunk = audio[i:i + int(window_size * sr)]
79
- if len(chunk) < int(window_size * sr):
80
- break
81
-
82
- emotion_scores = predict_emotion(chunk, output_probs=True, sampling_rate=RATE)
83
-
84
- # Mettre à jour le graphique
85
- for emotion, line in zip(emotion_labels, lines):
86
- xdata = list(line.get_xdata())
87
- ydata = list(line.get_ydata())
88
- xdata.append(i / sr)
89
- ydata.append(emotion_scores[emotion])
90
- scores[list(emotion_scores).index(emotion)].append(emotion_scores[emotion])
91
- line.set_data(xdata, ydata)
92
-
93
- ax.relim()
94
- ax.autoscale_view()
95
- chart.pyplot(fig, use_container_width=True)
96
-
97
- # Prepare the styling
98
- st.markdown("""
99
- <style>
100
- .colored-box {
101
- padding: 10px;
102
- border-radius: 5px;
103
- color: white;
104
- font-weight: bold;
105
- text-align: center;
106
- }
107
- </style>
108
- """
109
- , unsafe_allow_html=True)
110
-
111
- # Dynamically create the specified number of columns
112
- columns = st.columns(len(emotion_scores))
113
-
114
- # emotion_scores_mean = [sum(sublist) / len(sublist) for sublist in scores]
115
- emotion_scores_mean = {emotion:sum(sublist) / len(sublist) for emotion, sublist in zip(emotion_labels, scores)}
116
- max_emo = max(emotion_scores_mean)
117
- emotion_scores_sorted = dict(sorted(emotion_scores_mean.items(), key=lambda x: x[1], reverse=True))
118
- colors_sorted = [colors[list(emotion_scores_mean.keys()).index(key)] for key in list(emotion_scores_sorted.keys())]
119
-
120
- # Add content to each column
121
- for i, (col, emotion) in enumerate(zip(columns, emotion_scores_sorted)):
122
- color = colors_sorted[i % len(colors_sorted)] # Cycle through colors if more columns than colors
123
- col.markdown(f"""
124
- <div class="colored-box" style="background-color: {color};">
125
- {emotion} : {100*emotion_scores_sorted[emotion]:.2f} %
126
- </div>
127
- """
128
- , unsafe_allow_html=True)
129
-
130
-
131
-
132
- st.success("Analyse terminée !")
133
- else:
134
- st.warning("You need to load an audio file !")
135
-
136
- st.subheader("Feedback")
137
-
138
- # Initialisation du fichier CSV
139
- csv_file = os.path.join("src","predictions","feedback.csv")
140
-
141
- # Vérifier si le fichier CSV existe, sinon le créer avec des colonnes appropriées
142
- if not os.path.exists(csv_file):
143
- df = pd.DataFrame(columns=["filepath", "prediction", "feedback"])
144
- df.to_csv(csv_file, index=False)
145
-
146
- # Charger les données existantes du CSV
147
- df = pd.read_csv(csv_file)
148
-
149
- with st.form("feedback_form"):
150
- st.write("What should have been the correct prediction ? (*Choose the same emotion if the prediction was correct*).")
151
- feedback = st.selectbox("Your answer :", ['Sadness','Anger', 'Disgust', 'Fear', 'Surprise', 'Joy', 'Neutral'])
152
- submit_button = st.form_submit_button("Submit")
153
- st.write("En cliquant sur ce bouton, vous acceptez que votre audio soit sauvegardé dans notre base de données.")
154
-
155
- if submit_button:
156
- # Ajouter le feedback au DataFrame
157
- new_entry = {"filepath": audio_file.name, "prediction": max_emo, "feedback": feedback}
158
- df = df.append(new_entry, ignore_index=True)
159
-
160
- # Sauvegarder les données mises à jour dans le fichier CSV
161
- df.to_csv(csv_file, index=False)
162
-
163
- # Sauvegarder le fichier audio
164
- with open(os.path.join("src","predictions","data"), "wb") as f:
165
- f.write(audio_file.getbuffer())
166
-
167
- # Confirmation pour l'utilisateur
168
- st.success("Merci pour votre retour ! Vos données ont été sauvegardées.")
169
-
170
-
171
-
172
- with tab2:
173
- st.header("🔈 Realtime Audio Record")
174
- st.write("Here you can record an audio.")
175
-
176
- if st.button("Register", key="register-button"):
177
- st.success("Audio registered successfully.")
178
-
179
- audio_file = st_audiorec()
180
-
181
- if audio_file is not None:
182
- st.audio(audio_file, format='audio/wav')
183
-
184
- with tab3:
185
- st.header("📝 Speech2Text Transcription")
186
- st.write("Here you can get the audio transcript.")
187
-
188
- save = st.checkbox("Save transcription to .txt", value=False, key="save-transcript")
189
-
190
- ############################# A décommenté quand ce sera débogué
191
- if st.button("Transcribe", key="transcribe-button"):
192
- # # Fonction pour transcrire l'audio
193
- # transcription = transcribe_audio(st.audio)
194
-
195
- # # Charger et transcrire l'audio
196
- # # audio, rate = load_audio(audio_file_path) # (re)chargement de l'audio si nécessaire
197
- # transcription = transcribe_audio(audio_file, sampling_rate=16000)
198
-
199
- # # Afficher la transcription
200
- # st.write("Transcription :", transcription)
201
-
202
- st.success("Audio registered successfully.")
203
- # if save:
204
- # file_path = "transcript.txt"
205
-
206
- # # Write the text to the file
207
- # with open(file_path, "w") as file:
208
- # file.write(transcription)
209
-
210
- # st.success(f"Text saved to {file_path}")
211
-
212
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
views/emotion_analysis.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ import matplotlib.pyplot as plt
5
+ import librosa
6
+ from src.predict import predict_emotion
7
+
8
+ DIRECTORY = "audios"
9
+ FILE_NAME = "audio.wav"
10
+ RATE = 16000
11
+
12
+ def emotion_analysis():
13
+
14
+ st.header("❤️ Emotion Analysis")
15
+
16
+ if st.session_state.audio_file is None:
17
+ st.info("Please, upload or record an audio file in the studio tab")
18
+ st.stop()
19
+ else:
20
+ audio_file = st.session_state.audio_file
21
+
22
+ start_inference = st.button("Start emotion recogniton","inf_on_upl_btn")
23
+ emotion_labels = ["colere", "neutre", "joie"]
24
+ colors = ['#f71c1c', '#cac8c8', '#f6d60a']
25
+
26
+ if start_inference:
27
+ # Configuration Streamlit
28
+ with st.spinner("Real-time emotion analysis..."):
29
+ # uploaded_file = st.file_uploader("Choisissez un fichier audio", type=["wav", "mp3"])
30
+
31
+ if audio_file is not None:
32
+ # Charger et rééchantillonner l'audio
33
+ audio, sr = librosa.load(audio_file, sr=RATE)
34
+ # chunk = audio_file
35
+
36
+ # Paramètres de la fenêtre glissante
37
+ window_size = 1 # 1 seconde de données
38
+ hop_length = 0.5 # 0.5 secondes de chevauchement
39
+
40
+ # Créer un graphique en temps réel
41
+ fig, ax = plt.subplots()
42
+ lines = [ax.plot([], [], label=emotion)[0] for emotion in emotion_labels]
43
+ ax.set_ylim(0, 1)
44
+ ax.set_xlim(0, len(audio) / sr)
45
+ ax.set_xlabel("Temps (s)")
46
+ ax.set_ylabel("Probabilité")
47
+
48
+ chart = st.pyplot(fig)
49
+
50
+ scores = [[],[],[]] # 3 émotions pour l'instant
51
+
52
+ # Traitement par fenêtre glissante
53
+ for i in range(0, len(audio), int(hop_length * sr)):
54
+ chunk = audio[i:i + int(window_size * sr)]
55
+ if len(chunk) < int(window_size * sr):
56
+ break
57
+
58
+ emotion_scores = predict_emotion(chunk, output_probs=True, sampling_rate=RATE)
59
+
60
+ # Mettre à jour le graphique
61
+ for emotion, line in zip(emotion_labels, lines):
62
+ xdata = list(line.get_xdata())
63
+ ydata = list(line.get_ydata())
64
+ colour = colors[list(emotion_scores).index(emotion)]
65
+ xdata.append(i / sr)
66
+ ydata.append(emotion_scores[emotion])
67
+ scores[list(emotion_scores).index(emotion)].append(emotion_scores[emotion])
68
+ line.set_data(xdata, ydata)
69
+ line.set_color(colour)
70
+
71
+ ax.relim()
72
+ ax.autoscale_view()
73
+ ax.legend()
74
+ chart.pyplot(fig, use_container_width=True)
75
+
76
+ # Prepare the styling
77
+ st.markdown("""
78
+ <style>
79
+ .colored-box {
80
+ padding: 10px;
81
+ border-radius: 5px;
82
+ color: white;
83
+ font-weight: bold;
84
+ text-align: center;
85
+ }
86
+ </style>
87
+ """
88
+ , unsafe_allow_html=True)
89
+
90
+ # Dynamically create the specified number of columns
91
+ columns = st.columns(len(emotion_scores))
92
+
93
+ # emotion_scores_mean = [sum(sublist) / len(sublist) for sublist in scores]
94
+ emotion_scores_mean = {emotion:sum(sublist) / len(sublist) for emotion, sublist in zip(emotion_labels, scores)}
95
+ max_emo = max(emotion_scores_mean)
96
+ emotion_scores_sorted = dict(sorted(emotion_scores_mean.items(), key=lambda x: x[1], reverse=True))
97
+ colors_sorted = [colors[list(emotion_scores_mean.keys()).index(key)] for key in list(emotion_scores_sorted.keys())]
98
+
99
+ # Add content to each column
100
+ for i, (col, emotion) in enumerate(zip(columns, emotion_scores_sorted)):
101
+ color = colors_sorted[i % len(colors_sorted)] # Cycle through colors if more columns than colors
102
+ col.markdown(f"""
103
+ <div class="colored-box" style="background-color: {color};">
104
+ {emotion} : {100*emotion_scores_sorted[emotion]:.2f} %
105
+ </div>
106
+ """
107
+ , unsafe_allow_html=True)
108
+
109
+
110
+
111
+ st.success("Analyse terminée !")
112
+ else:
113
+ st.warning("You need to load an audio file !")
114
+
115
+ if start_inference:
116
+
117
+ st.subheader("Feedback")
118
+
119
+ # Initialisation du fichier CSV
120
+ csv_file = os.path.join("src","predictions","feedback.csv")
121
+
122
+ # Vérifier si le fichier CSV existe, sinon le créer avec des colonnes appropriées
123
+ if not os.path.exists(csv_file):
124
+ df = pd.DataFrame(columns=["filepath", "prediction", "feedback"])
125
+ df.to_csv(csv_file, index=False)
126
+
127
+ # Charger les données existantes du CSV
128
+ df = pd.read_csv(csv_file)
129
+
130
+ with st.form("feedback_form"):
131
+ st.write("What should have been the correct prediction ? (*Choose the same emotion if the prediction was correct*).")
132
+ feedback = st.selectbox("Your answer :", ['Sadness','Anger', 'Disgust', 'Fear', 'Surprise', 'Joy', 'Neutral'])
133
+ submit_button = st.form_submit_button("Submit")
134
+ st.write("En cliquant sur ce bouton, vous acceptez que votre audio soit sauvegardé dans notre base de données.")
135
+
136
+ if submit_button:
137
+ # Ajouter le feedback au DataFrame
138
+ new_entry = pd.DataFrame([{"filepath": audio_file.name, "prediction": max_emo, "feedback": feedback}])
139
+ # df = df.append(new_entry, ignore_index=True)
140
+ df = pd.concat([df, new_entry], ignore_index=True)
141
+
142
+ # Sauvegarder les données mises à jour dans le fichier CSV
143
+ df.to_csv(csv_file, index=False)
144
+
145
+ # Sauvegarder le fichier audio
146
+ with open(os.path.join("src","predictions","data",audio_file.name), "wb") as f:
147
+ f.write(audio_file.getbuffer())
148
+
149
+ # Confirmation pour l'utilisateur
150
+ st.success("Merci pour votre retour ! Vos données ont été sauvegardées.")
views/real_time.py CHANGED
@@ -10,12 +10,6 @@ from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
10
  import matplotlib.pyplot as plt
11
  import numpy as np
12
  import time
13
- from predict import predict_emotion
14
-
15
- # Charger le modèle Wav2Vec2 pour la classification des émotions
16
- model_name = "superb/wav2vec2-base-superb-er" # Exemple de modèle pour la reconnaissance des émotions
17
- processor = Wav2Vec2Processor.from_pretrained(model_name)
18
- model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
19
 
20
  # Paramètres audio
21
  CHUNK = 1024
@@ -23,15 +17,6 @@ FORMAT = pyaudio.paInt16
23
  CHANNELS = 1
24
  RATE = 16000
25
 
26
- # Fonction pour prédire l'émotion à partir d'un segment audio
27
- # def predict_emotion(audio_data):
28
- # inputs = processor(audio_data, sampling_rate=RATE, return_tensors="pt", padding=True)
29
- # with torch.no_grad():
30
- # logits = model(**inputs).logits
31
- # predicted_id = torch.argmax(logits, dim=-1).item()
32
- # emotion = model.config.id2label[predicted_id]
33
- # return emotion
34
-
35
  # Interface Streamlit
36
  st.title("Détection des émotions en temps réel")
37
 
 
10
  import matplotlib.pyplot as plt
11
  import numpy as np
12
  import time
 
 
 
 
 
 
13
 
14
  # Paramètres audio
15
  CHUNK = 1024
 
17
  CHANNELS = 1
18
  RATE = 16000
19
 
 
 
 
 
 
 
 
 
 
20
  # Interface Streamlit
21
  st.title("Détection des émotions en temps réel")
22
 
views/studio.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from st_audiorec import st_audiorec
3
+
4
+ # from src.model.transcriber import transcribe_audio
5
+
6
+
7
+ def studio():
8
+ st.title("SISE ultimate challenge")
9
+ st.write("C'est le dernier challenge de la formation SISE.")
10
+ st.markdown("""
11
+ **Overview:**
12
+ - Analyse de logs
13
+ - Analyse de données
14
+ - Machine learning
15
+ """)
16
+
17
+ st.markdown("---")
18
+
19
+ st.header("🎧 Audio File Studio")
20
+
21
+ tab1, tab2, tab3 = st.tabs(["⬆️ Record Audio", "🔈 Realtime Audio", "📝 Transcription"])
22
+
23
+ with tab1:
24
+ st.header("⬆️ Upload Audio Record")
25
+ st.write("Here you can upload a pre-recorded audio.")
26
+ audio_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "ogg"])
27
+
28
+ if "audio_file" not in st.session_state:
29
+ st.session_state.audio_file = None
30
+
31
+ if audio_file is not None:
32
+ st.success("Audio file uploaded successfully !")
33
+ st.session_state.audio_file = audio_file
34
+
35
+ # with open(os.path.join(DIRECTORY,FILE_NAME), "wb") as f:
36
+ # f.write(audio_file.getbuffer())
37
+ # st.success(f"Saved file: {FILE_NAME}")
38
+
39
+
40
+
41
+ with tab2:
42
+ st.header("🔈 Realtime Audio Record")
43
+ st.write("Here you can record an audio.")
44
+
45
+ if "audio_file" not in st.session_state:
46
+ st.session_state.audio_file = None
47
+
48
+ audio_file = st_audiorec()
49
+
50
+ if audio_file is not None:
51
+ st.audio(audio_file, format='audio/wav')
52
+ st.success("Audio recorded successfully !")
53
+ st.session_state.audio_file = audio_file
54
+
55
+ # Boutons pour démarrer et arrêter l'enregistrement
56
+ # start_button = st.button("Démarrer l'enregistrement")
57
+ # stop_button = st.button("Arrêter l'enregistrement")
58
+ # start_stop = st.button("Démarrer/Arrêter l'enregistrement")
59
+
60
+
61
+ # Zone de visualisation des émotions en temps réel
62
+ # emotion_placeholder = st.empty()
63
+ # final_emotion_placeholder = st.empty()
64
+ # audio = pyaudio.PyAudio()
65
+ # audio_buffer = np.array([])
66
+ # emotion_prediction = "Aucune prédiction"
67
+ # is_recording = False
68
+
69
+ # if start_stop:
70
+ # is_recording = not is_recording
71
+
72
+ # # Variables globales pour le partage de données entre threads
73
+ # def audio_callback(in_data, frame_count, time_info, status):
74
+ # global audio_buffer
75
+ # audio_data = np.frombuffer(in_data, dtype=np.float32)
76
+ # audio_buffer = np.concatenate((audio_buffer, audio_data))
77
+ # return (in_data, pyaudio.paContinue)
78
+
79
+ # def predict_emotion_thread():
80
+ # global audio_buffer, emotion_prediction
81
+ # while is_recording:
82
+ # if len(audio_buffer) >= CHUNK:
83
+ # chunk = audio_buffer[:CHUNK]
84
+ # audio_buffer = audio_buffer[STRIDE:]
85
+ # emotion_prediction = predict_emotion(chunk, output_probs=False, sampling_rate=RATE) # Utilisez votre modèle ici
86
+ # # time.sleep(0.1)
87
+
88
+ # if is_recording:
89
+ # audio_buffer = np.array([])
90
+ # stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True,
91
+ # frames_per_buffer=CHUNK, stream_callback=audio_callback)
92
+ # stream.start_stream()
93
+ # threading.Thread(target=predict_emotion_thread, daemon=True).start()
94
+ # st.write("Enregistrement en cours...")
95
+ # else:
96
+ # stream.stop_stream()
97
+ # stream.close()
98
+ # st.write("Enregistrement arrêté.")
99
+
100
+ # emotion_display = st.empty()
101
+
102
+ # while is_recording:
103
+ # emotion_display.write(f"Émotion détectée : {emotion_prediction}")
104
+ # # time.sleep(0.1)
105
+
106
+ # audio.terminate()
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+
120
+
121
+
122
+
123
+
124
+
125
+
126
+ # stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
127
+
128
+ # frames = []
129
+ # real_time_emotions = []
130
+
131
+ # while not stop_button:
132
+ # data = stream.read(CHUNK)
133
+ # frames.append(data)
134
+
135
+ # # Traitement en temps réel (par tranche de 1 seconde)
136
+ # if len(frames) >= RATE // CHUNK:
137
+ # audio_segment = np.frombuffer(b''.join(frames[-(RATE // CHUNK):]), dtype=np.int16)
138
+ # emotion = predict_emotion(audio_segment, output_probs=False, sampling_rate=RATE)
139
+ # real_time_emotions.append(emotion)
140
+ # emotion_placeholder.line_chart(real_time_emotions) # Affichage graphique des émotions
141
+
142
+ # # Arrêt de l'enregistrement
143
+ # stream.stop_stream()
144
+ # stream.close()
145
+ # audio.terminate()
146
+
147
+ # # Sauvegarde de l'audio enregistré
148
+ # wf = wave.open("output.wav", "wb")
149
+ # wf.setnchannels(CHANNELS)
150
+ # wf.setsampwidth(audio.get_sample_size(FORMAT))
151
+ # wf.setframerate(RATE)
152
+ # wf.writeframes(b"".join(frames))
153
+ # wf.close()
154
+
155
+ # # Prédiction finale sur tout l'audio enregistré
156
+ # full_audio_data = np.frombuffer(b''.join(frames), dtype=np.int16)
157
+ # final_emotion = predict_emotion(full_audio_data)
158
+
159
+ # final_emotion_placeholder.write(f"Émotion finale prédite : {final_emotion}")
160
+
161
+
162
+
163
+ with tab3:
164
+ st.header("📝 Speech2Text Transcription")
165
+ st.write("Here you can get the audio transcript.")
166
+
167
+ save = st.checkbox("Save transcription to .txt", value=False, key="save-transcript")
168
+
169
+ ############################# A décommenté quand ce sera débogué
170
+ if st.button("Transcribe", key="transcribe-button"):
171
+ # # Fonction pour transcrire l'audio
172
+ # transcription = transcribe_audio(st.audio)
173
+
174
+ # # Charger et transcrire l'audio
175
+ # # audio, rate = load_audio(audio_file_path) # (re)chargement de l'audio si nécessaire
176
+ # transcription = transcribe_audio(audio_file, sampling_rate=16000)
177
+
178
+ # # Afficher la transcription
179
+ # st.write("Transcription :", transcription)
180
+
181
+ st.success("Audio registered successfully.")
182
+ # if save:
183
+ # file_path = "transcript.txt"
184
+
185
+ # # Write the text to the file
186
+ # with open(file_path, "w") as file:
187
+ # file.write(transcription)
188
+
189
+ # st.success(f"Text saved to {file_path}")
190
+
191
+