jdalfonso commited on
Commit
201ed31
·
1 Parent(s): 6855218

:rocket: feature new interface

Browse files
.gitignore CHANGED
@@ -180,7 +180,7 @@ old/
180
  *.wav
181
  data/*
182
  *.pth
183
-
184
  # Mac
185
  .DS_Store
186
  .idea
 
180
  *.wav
181
  data/*
182
  *.pth
183
+ old/
184
  # Mac
185
  .DS_Store
186
  .idea
.streamlit/config.toml CHANGED
@@ -1,4 +1,4 @@
1
  [theme]
2
- base="dark"
3
  primaryColor="#7c99b4"
4
 
 
1
  [theme]
2
+ base="light"
3
  primaryColor="#7c99b4"
4
 
app.py CHANGED
@@ -1,45 +1,248 @@
1
  import streamlit as st
2
- from streamlit_option_menu import option_menu
3
- from views.studio import studio
4
- from views.emotion_analysis import emotion_analysis
5
- from views.about import about
6
  import os
7
- import sys
8
-
9
- sys.path.append(os.path.abspath("src"))
10
- sys.path.append(os.path.abspath("."))
11
-
12
- if "model_loaded" not in st.session_state:
13
- st.session_state.model_loaded = None
14
-
15
- # Set the logo
16
- st.sidebar.image("img/logo.png", use_container_width=True)
17
-
18
- # Create a sidebar with navigation options
19
- # Sidebar navigation with streamlit-option-menu
20
- with st.sidebar:
21
- # st.image("img/logo.png", use_container_width=True)
22
- # st.markdown("<h1 style='text-align: center;'>SecureIA Dashboard</h1>", unsafe_allow_html=True)
23
- # Navigation menu with icons
24
- selected_tab = option_menu(
25
- menu_title=None, # Added menu_title parameter
26
- options=["Studio", "Emotion Analysis", "About"],
27
- icons=["record-circle", "robot", "info-circle"],
28
- menu_icon="cast",
29
- default_index=0,
30
- # styles={
31
- # "container": {"padding": "5px", "background-color": "#f0f2f6"},
32
- # "icon": {"color": "orange", "font-size": "18px"},
33
- # "nav-link": {"font-size": "16px", "text-align": "left", "margin": "0px", "color": "black"},
34
- # "nav-link-selected": {"background-color": "#4CAF50", "color": "white"},
35
- # }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  )
37
 
 
 
 
 
 
 
 
 
 
38
 
39
- if selected_tab == "Studio":
40
- studio()
41
- elif selected_tab == "Emotion Analysis":
42
- emotion_analysis()
43
- elif selected_tab == "About":
44
- about()
45
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
 
 
4
  import os
5
+ import time
6
+ import matplotlib.pyplot as plt
7
+ from datetime import datetime
8
+ import tempfile
9
+ import io
10
+ import json
11
+ from model.transcriber import transcribe_audio
12
+ from predict import predict_emotion
13
+
14
+ # You'll need to install this package:
15
+ # pip install streamlit-audiorec
16
+ from st_audiorec import st_audiorec
17
+
18
+ # Page configuration
19
+ st.set_page_config(
20
+ page_title="Emotion Analyser",
21
+ page_icon="🎤",
22
+ layout="wide"
23
+ )
24
+
25
+ # Initialize session state variables if they don't exist
26
+ if 'audio_data' not in st.session_state:
27
+ st.session_state.audio_data = []
28
+ if 'current_audio_index' not in st.session_state:
29
+ st.session_state.current_audio_index = -1
30
+ if 'audio_history_csv' not in st.session_state:
31
+ # Define columns for our CSV storage
32
+ st.session_state.audio_history_csv = pd.DataFrame(
33
+ columns=['timestamp', 'file_path', 'transcription', 'emotion', 'probabilities']
34
+ )
35
+ if 'needs_rerun' not in st.session_state:
36
+ st.session_state.needs_rerun = False
37
+
38
+ # Function to ensure we keep only the last 10 entries
39
+ def update_audio_history(new_entry):
40
+ # Add the new entry
41
+ st.session_state.audio_history_csv = pd.concat([st.session_state.audio_history_csv, pd.DataFrame([new_entry])], ignore_index=True)
42
+
43
+ # Keep only the last 10 entries
44
+ if len(st.session_state.audio_history_csv) > 10:
45
+ st.session_state.audio_history_csv = st.session_state.audio_history_csv.iloc[-10:]
46
+
47
+ # Save to CSV
48
+ st.session_state.audio_history_csv.to_csv('audio_history.csv', index=False)
49
+
50
+ # Function to process audio and get results
51
+ def process_audio(audio_path):
52
+ try:
53
+ # Get transcription
54
+ transcription = transcribe_audio(audio_path)
55
+
56
+ # Get emotion prediction
57
+ predicted_emotion, probabilities = predict_emotion(audio_path)
58
+
59
+ # Update audio history
60
+ new_entry = {
61
+ 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
62
+ 'file_path': audio_path,
63
+ 'transcription': transcription,
64
+ 'emotion': predicted_emotion,
65
+ 'probabilities': str(probabilities) # Convert dict to string for storage
66
+ }
67
+ update_audio_history(new_entry)
68
+
69
+ # Update current index
70
+ st.session_state.current_audio_index = len(st.session_state.audio_history_csv) - 1
71
+
72
+ return transcription, predicted_emotion, probabilities
73
+ except Exception as e:
74
+ st.error(f"Error processing audio: {str(e)}")
75
+ return None, None, None
76
+
77
+ # Function to split audio into 10-second segments
78
+ def split_audio(audio_file, segment_length=10):
79
+ # This is a placeholder - in a real implementation, you'd use a library like pydub
80
+ # to split the audio file into segments
81
+ st.warning("Audio splitting functionality is a placeholder. Implement with pydub or similar library.")
82
+ # For now, we'll just return the whole file as a single segment
83
+ return [audio_file]
84
+
85
+ # Function to display emotion visualization
86
+ def display_emotion_chart(probabilities):
87
+ emotions = list(probabilities.keys())
88
+ values = list(probabilities.values())
89
+
90
+ fig, ax = plt.subplots(figsize=(10, 5))
91
+ bars = ax.bar(emotions, values, color=['red', 'gray', 'green'])
92
+
93
+ # Add data labels on top of bars
94
+ for bar in bars:
95
+ height = bar.get_height()
96
+ ax.text(bar.get_x() + bar.get_width()/2., height + 0.02,
97
+ f'{height:.2f}', ha='center', va='bottom')
98
+
99
+ ax.set_ylim(0, 1.1)
100
+ ax.set_ylabel('Probability')
101
+ ax.set_title('Emotion Prediction Results')
102
+
103
+ st.pyplot(fig)
104
+
105
+ # Trigger rerun if needed (replaces experimental_rerun)
106
+ if st.session_state.needs_rerun:
107
+ st.session_state.needs_rerun = False
108
+ st.rerun() # Using st.rerun() instead of experimental_rerun
109
+
110
+ # Main App Layout
111
+ st.image("./img/logo_01.png", width=400)
112
+
113
+ # Create two columns for the main layout
114
+ col1, col2 = st.columns([1, 1])
115
+
116
+ with col1:
117
+ st.header("Audio Input")
118
+
119
+ # Method selection
120
+
121
+ tab1, tab2 = st.tabs(["Record Audio", "Upload Audio"])
122
+
123
+ with tab1:
124
+ st.write("Record your audio (max 10 seconds):")
125
+
126
+ # Using streamlit-audiorec for better recording functionality
127
+ wav_audio_data = st_audiorec()
128
+
129
+ if wav_audio_data is not None:
130
+ # Save the recorded audio to a temporary file
131
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
132
+ tmp_file.write(wav_audio_data)
133
+ tmp_file_path = tmp_file.name
134
+
135
+ st.success("Audio recorded successfully!")
136
+
137
+ # Process button
138
+ if st.button("Process Recorded Audio"):
139
+ # Process the audio
140
+ with st.spinner("Processing audio..."):
141
+ transcription, emotion, probs = process_audio(tmp_file_path)
142
+ # Set flag for rerun instead of calling experimental_rerun
143
+ if transcription is not None:
144
+ st.success("Audio processed successfully!")
145
+ st.session_state.needs_rerun = True
146
+
147
+ with tab2:
148
+ uploaded_file = st.file_uploader("Upload an audio file (WAV format)", type=['wav'])
149
+
150
+ if uploaded_file is not None:
151
+ # Save the uploaded file to a temporary location
152
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
153
+ tmp_file.write(uploaded_file.getbuffer())
154
+ tmp_file_path = tmp_file.name
155
+
156
+ st.audio(uploaded_file, format="audio/wav")
157
+
158
+ # Process button
159
+ if st.button("Process Uploaded Audio"):
160
+ # Split audio into 10-second segments
161
+ with st.spinner("Processing audio..."):
162
+ segments = split_audio(tmp_file_path)
163
+
164
+ # Process each segment
165
+ for i, segment_path in enumerate(segments):
166
+ st.write(f"Processing segment {i+1}...")
167
+ transcription, emotion, probs = process_audio(segment_path)
168
+
169
+ # Set flag for rerun instead of calling experimental_rerun
170
+ st.success("Audio processed successfully!")
171
+ st.session_state.needs_rerun = True
172
+
173
+ with col2:
174
+ st.header("Results")
175
+
176
+ # Display results if available
177
+ if st.session_state.current_audio_index >= 0 and len(st.session_state.audio_history_csv) > 0:
178
+ current_data = st.session_state.audio_history_csv.iloc[st.session_state.current_audio_index]
179
+
180
+ # Transcription
181
+ st.subheader("Transcription")
182
+ st.text_area("", value=current_data['transcription'], height=100, key="transcription_area")
183
+
184
+ # Emotion
185
+ st.subheader("Detected Emotion")
186
+ st.info(f"🎭 Predicted emotion: **{current_data['emotion']}**")
187
+
188
+ # Convert string representation of dict back to actual dict
189
+ try:
190
+ import ast
191
+ probs = ast.literal_eval(current_data['probabilities'])
192
+ display_emotion_chart(probs)
193
+ except Exception as e:
194
+ st.error(f"Error parsing probabilities: {str(e)}")
195
+ st.write(f"Raw probabilities: {current_data['probabilities']}")
196
+ else:
197
+ st.info("Record or upload audio to see results")
198
+
199
+ # Audio History and Analytics Section
200
+ st.header("Audio History and Analytics")
201
+
202
+ if len(st.session_state.audio_history_csv) > 0:
203
+ # Display a select box to choose from audio history
204
+ timestamps = st.session_state.audio_history_csv['timestamp'].tolist()
205
+ selected_timestamp = st.selectbox(
206
+ "Select audio from history:",
207
+ options=timestamps,
208
+ index=len(timestamps) - 1 # Default to most recent
209
  )
210
 
211
+ # Update current index when selection changes
212
+ selected_index = st.session_state.audio_history_csv[
213
+ st.session_state.audio_history_csv['timestamp'] == selected_timestamp
214
+ ].index[0]
215
+
216
+ # Only update if different
217
+ if st.session_state.current_audio_index != selected_index:
218
+ st.session_state.current_audio_index = selected_index
219
+ st.session_state.needs_rerun = True
220
 
221
+ # Analytics button
222
+ if st.button("Run Analytics on Selected Audio"):
223
+ st.subheader("Analytics Results")
224
+
225
+ # Get the selected audio data
226
+ selected_data = st.session_state.audio_history_csv.iloc[selected_index]
227
+
228
+ # Display analytics (this is where you would add more sophisticated analytics)
229
+ st.write(f"Selected Audio: {selected_data['timestamp']}")
230
+ st.write(f"Emotion: {selected_data['emotion']}")
231
+ st.write(f"File Path: {selected_data['file_path']}")
232
+
233
+ # Add any additional analytics you want here
234
+
235
+ # Try to play the selected audio
236
+ try:
237
+ if os.path.exists(selected_data['file_path']):
238
+ st.audio(selected_data['file_path'], format="audio/wav")
239
+ else:
240
+ st.warning("Audio file not found - it may have been deleted or moved.")
241
+ except Exception as e:
242
+ st.error(f"Error playing audio: {str(e)}")
243
+ else:
244
+ st.info("No audio history available. Record or upload audio to create history.")
245
+
246
+ # Footer
247
+ st.markdown("---")
248
+ st.caption("Audio Emotion Analyzer - Processes audio in 10-second segments and predicts emotions")
src/config.py → config.py RENAMED
@@ -21,5 +21,5 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
21
  MODEL_NAME = "facebook/wav2vec2-large-xlsr-53-french"
22
 
23
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
24
- BEST_MODEL_NAME = os.path.join(BASE_DIR, "..", "best_model.pth") # Monte d'un niveau pour aller à la racine
25
 
 
21
  MODEL_NAME = "facebook/wav2vec2-large-xlsr-53-french"
22
 
23
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
24
+ BEST_MODEL_NAME = os.path.join(BASE_DIR, "model","fr-speech-emotion-model.pth") # Monte d'un niveau pour aller à la racine
25
 
img copy/logo_01.png ADDED
__init__.py → model/__init__.py RENAMED
File without changes
{src/model → model}/emotion_classifier.py RENAMED
File without changes
{src/model → model}/feature_extractor.py RENAMED
@@ -1,6 +1,6 @@
1
  import torch
2
  from transformers import Wav2Vec2Model, Wav2Vec2Processor
3
- from src.config import MODEL_NAME, DEVICE
4
 
5
  processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
6
  feature_extractor = Wav2Vec2Model.from_pretrained(MODEL_NAME).to(DEVICE)
 
1
  import torch
2
  from transformers import Wav2Vec2Model, Wav2Vec2Processor
3
+ from config import MODEL_NAME, DEVICE
4
 
5
  processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
6
  feature_extractor = Wav2Vec2Model.from_pretrained(MODEL_NAME).to(DEVICE)
{src/model → model}/transcriber.py RENAMED
File without changes
src/predict.py → predict.py RENAMED
@@ -4,8 +4,8 @@ import torch
4
  import librosa
5
  import numpy as np
6
  from model.emotion_classifier import EmotionClassifier
7
- from src.utils.preprocessing import collate_fn
8
- from src.config import DEVICE, NUM_LABELS, BEST_MODEL_NAME
9
 
10
  # Charger le modèle entraîné
11
  feature_dim = 40 # Nombre de MFCCs utilisés
 
4
  import librosa
5
  import numpy as np
6
  from model.emotion_classifier import EmotionClassifier
7
+ from utils.preprocessing import collate_fn
8
+ from config import DEVICE, NUM_LABELS, BEST_MODEL_NAME
9
 
10
  # Charger le modèle entraîné
11
  feature_dim = 40 # Nombre de MFCCs utilisés
src/__init__.py DELETED
File without changes
src/data/dataset.csv DELETED
@@ -1,113 +0,0 @@
1
- dossier,emotion
2
- colere/c1ac.wav,colere
3
- colere/c1af.wav,colere
4
- colere/c1aj.wav,colere
5
- colere/c1an.wav,colere
6
- colere/c1bc.wav,colere
7
- colere/c1bf.wav,colere
8
- colere/c1bj.wav,colere
9
- colere/c1bn.wav,colere
10
- colere/c1cc.wav,colere
11
- colere/c1cf.wav,colere
12
- colere/c1cj.wav,colere
13
- colere/c2ac.wav,colere
14
- colere/c2af.wav,colere
15
- colere/c2aj.wav,colere
16
- colere/c2an.wav,colere
17
- colere/c2bc.wav,colere
18
- colere/c2bf.wav,colere
19
- colere/c2bj.wav,colere
20
- colere/c2bn.wav,colere
21
- colere/c2cn.wav,colere
22
- colere/c3ac.wav,colere
23
- colere/c3af.wav,colere
24
- colere/c3aj.wav,colere
25
- colere/c3an.wav,colere
26
- colere/c3bc.wav,colere
27
- colere/c3bf.wav,colere
28
- colere/c3bj.wav,colere
29
- colere/c3bn.wav,colere
30
- colere/c4aaf.wav,colere
31
- colere/c4ac.wav,colere
32
- colere/c4af.wav,colere
33
- colere/c4aj.wav,colere
34
- colere/c4an.wav,colere
35
- colere/c4bc.wav,colere
36
- colere/c4bj.wav,colere
37
- colere/c4bn.wav,colere
38
- colere/c5an.wav,colere
39
- colere/c5c.wav,colere
40
- colere/c5f.wav,colere
41
- colere/c5j.wav,colere
42
- neutre/n1ac.wav,neutre
43
- neutre/n1af.wav,neutre
44
- neutre/n1aj.wav,neutre
45
- neutre/n1an.wav,neutre
46
- neutre/n1bc.wav,neutre
47
- neutre/n1bf.wav,neutre
48
- neutre/n1bj.wav,neutre
49
- neutre/n1bn.wav,neutre
50
- neutre/n2ac.wav,neutre
51
- neutre/n2af.wav,neutre
52
- neutre/n2aj.wav,neutre
53
- neutre/n2an.wav,neutre
54
- neutre/n2bc.wav,neutre
55
- neutre/n2bf.wav,neutre
56
- neutre/n2bj.wav,neutre
57
- neutre/n2bn.wav,neutre
58
- neutre/n3ac.wav,neutre
59
- neutre/n3af.wav,neutre
60
- neutre/n3aj.wav,neutre
61
- neutre/n3an.wav,neutre
62
- neutre/n3bc.wav,neutre
63
- neutre/n3bf.wav,neutre
64
- neutre/n3bj.wav,neutre
65
- neutre/n3bn.wav,neutre
66
- neutre/n4ac.wav,neutre
67
- neutre/n4aj.wav,neutre
68
- neutre/n4an.wav,neutre
69
- neutre/n4f.wav,neutre
70
- neutre/n5ac.wav,neutre
71
- neutre/n5af.wav,neutre
72
- neutre/n5aj.wav,neutre
73
- neutre/n5an.wav,neutre
74
- neutre/n5bc.wav,neutre
75
- neutre/n5bf.wav,neutre
76
- neutre/n5bj.wav,neutre
77
- neutre/n5bn.wav,neutre
78
- joie/h1ac.wav,joie
79
- joie/h1af.wav,joie
80
- joie/h1aj.wav,joie
81
- joie/h1an.wav,joie
82
- joie/h1bc.wav,joie
83
- joie/h1bf.wav,joie
84
- joie/h1bj.wav,joie
85
- joie/h1bn.wav,joie
86
- joie/h21f.wav,joie
87
- joie/h2ac.wav,joie
88
- joie/h2aj.wav,joie
89
- joie/h2an.wav,joie
90
- joie/h2bc.wav,joie
91
- joie/h2bf.wav,joie
92
- joie/h2bj.wav,joie
93
- joie/h2bn.wav,joie
94
- joie/h3ac.wav,joie
95
- joie/h3af.wav,joie
96
- joie/h3aj.wav,joie
97
- joie/h3anwav.wav,joie
98
- joie/h3bc.wav,joie
99
- joie/h3bf.wav,joie
100
- joie/h3bj.wav,joie
101
- joie/h3bn.wav,joie
102
- joie/h4ac.wav,joie
103
- joie/h4af.wav,joie
104
- joie/h4aj.wav,joie
105
- joie/h4an.wav,joie
106
- joie/h4bc.wav,joie
107
- joie/h4bf.wav,joie
108
- joie/h4bj.wav,joie
109
- joie/h4bn.wav,joie
110
- joie/h5an.wav,joie
111
- joie/h5c.wav,joie
112
- joie/h5f.wav,joie
113
- joie/h5j.wav,joie
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/data/processing.ipynb DELETED
@@ -1,113 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 2,
6
- "metadata": {},
7
- "outputs": [
8
- {
9
- "name": "stdout",
10
- "output_type": "stream",
11
- "text": [
12
- "<class 'pandas.core.frame.DataFrame'>\n",
13
- "RangeIndex: 112 entries, 0 to 111\n",
14
- "Data columns (total 2 columns):\n",
15
- " # Column Non-Null Count Dtype \n",
16
- "--- ------ -------------- ----- \n",
17
- " 0 dossier 112 non-null object\n",
18
- " 1 emotion 112 non-null object\n",
19
- "dtypes: object(2)\n",
20
- "memory usage: 1.9+ KB\n"
21
- ]
22
- }
23
- ],
24
- "source": [
25
- "import pandas as pd\n",
26
- "\n",
27
- "data = pd.read_csv('dataset.csv', sep=',', header=0)\n",
28
- "\n",
29
- "data.info()\n",
30
- "\n"
31
- ]
32
- },
33
- {
34
- "cell_type": "code",
35
- "execution_count": 3,
36
- "metadata": {},
37
- "outputs": [
38
- {
39
- "name": "stdout",
40
- "output_type": "stream",
41
- "text": [
42
- " dossier emotion\n",
43
- "0 colere/c1ac.wav colere\n",
44
- "1 colere/c1af.wav colere\n",
45
- "2 colere/c1aj.wav colere\n",
46
- "3 colere/c1an.wav colere\n",
47
- "4 colere/c1bc.wav colere\n",
48
- ".. ... ...\n",
49
- "107 joie/h4bn.wav joie\n",
50
- "108 joie/h5an.wav joie\n",
51
- "109 joie/h5c.wav joie\n",
52
- "110 joie/h5f.wav joie\n",
53
- "111 joie/h5j.wav joie\n",
54
- "\n",
55
- "[112 rows x 2 columns]\n"
56
- ]
57
- },
58
- {
59
- "name": "stderr",
60
- "output_type": "stream",
61
- "text": [
62
- "C:\\Users\\Evidya\\AppData\\Local\\Temp\\ipykernel_24704\\3726049179.py:8: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n",
63
- " data = data.applymap(lambda x: x.strip() if isinstance(x, str) else x)\n"
64
- ]
65
- }
66
- ],
67
- "source": [
68
- "import pandas as pd\n",
69
- "\n",
70
- "\n",
71
- "# 🔹 1. Supprimer les espaces des noms de colonnes\n",
72
- "data.columns = data.columns.str.strip()\n",
73
- "\n",
74
- "# 🔹 2. Supprimer les espaces dans toutes les cellules (colonnes object)\n",
75
- "data = data.applymap(lambda x: x.strip() if isinstance(x, str) else x)\n",
76
- "\n",
77
- "# Afficher le DataFrame corrigé\n",
78
- "print(data)\n"
79
- ]
80
- },
81
- {
82
- "cell_type": "code",
83
- "execution_count": 4,
84
- "metadata": {},
85
- "outputs": [],
86
- "source": [
87
- "# save to csv\n",
88
- "data.to_csv('dataset.csv', index=False, sep=',')"
89
- ]
90
- }
91
- ],
92
- "metadata": {
93
- "kernelspec": {
94
- "display_name": ".venv",
95
- "language": "python",
96
- "name": "python3"
97
- },
98
- "language_info": {
99
- "codemirror_mode": {
100
- "name": "ipython",
101
- "version": 3
102
- },
103
- "file_extension": ".py",
104
- "mimetype": "text/x-python",
105
- "name": "python",
106
- "nbconvert_exporter": "python",
107
- "pygments_lexer": "ipython3",
108
- "version": "3.11.5"
109
- }
110
- },
111
- "nbformat": 4,
112
- "nbformat_minor": 2
113
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/model/__init__.py DELETED
File without changes
src/predictions/feedback.csv DELETED
@@ -1 +0,0 @@
1
- filepath,prediction,feedback
 
 
src/test_backend.ipynb DELETED
@@ -1,63 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 5,
6
- "metadata": {},
7
- "outputs": [
8
- {
9
- "name": "stdout",
10
- "output_type": "stream",
11
- "text": [
12
- "Transcription : tu as encore oublié de faire le dossier c'était hurgent nom de chien\n"
13
- ]
14
- }
15
- ],
16
- "source": [
17
- "# make a transcription from audio file\n",
18
- "from model.transcriber import transcribe_audio\n",
19
- "import os\n",
20
- "\n",
21
- "base_path = os.path.abspath(os.path.join(\"data\"))\n",
22
- "audio_path = os.path.join(base_path, \"colere\", \"c1af.wav\") # path to audio file\n",
23
- "texte = transcribe_audio(audio_path)\n",
24
- "print(f\"Transcription : {texte}\")"
25
- ]
26
- },
27
- {
28
- "cell_type": "code",
29
- "execution_count": null,
30
- "metadata": {},
31
- "outputs": [],
32
- "source": [
33
- "from predict import predict_emotion\n",
34
- "\n",
35
- "base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), \"data\"))\n",
36
- "audio_file = os.path.join(base_path, \"colere\", \"c1ac.wav\")\n",
37
- "emotion = predict_emotion(audio_file)\n",
38
- "print(f\"🎤 L'émotion prédite est : {emotion}\")"
39
- ]
40
- }
41
- ],
42
- "metadata": {
43
- "kernelspec": {
44
- "display_name": ".venv",
45
- "language": "python",
46
- "name": "python3"
47
- },
48
- "language_info": {
49
- "codemirror_mode": {
50
- "name": "ipython",
51
- "version": 3
52
- },
53
- "file_extension": ".py",
54
- "mimetype": "text/x-python",
55
- "name": "python",
56
- "nbconvert_exporter": "python",
57
- "pygments_lexer": "ipython3",
58
- "version": "3.11.5"
59
- }
60
- },
61
- "nbformat": 4,
62
- "nbformat_minor": 2
63
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test_speech.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
4
+ import os
5
+
6
+ # 🔹 Paramètres
7
+ MODEL_NAME = "./wav2vec2_emotion" # Chemin du modèle sauvegardé
8
+ LABELS = ["colere", "joie", "neutre"] # Les classes
9
+
10
+ # 🔹 Charger le processeur et le modèle
11
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
+ processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
13
+ model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_NAME).to(device)
14
+ model.eval() # Mode évaluation
15
+
16
+
17
+ def predict_emotion(audio_path):
18
+ # Charger l'audio
19
+ waveform, sample_rate = torchaudio.load(audio_path)
20
+
21
+ # Prétraitement du son
22
+ inputs = processor(
23
+ waveform.squeeze().numpy(),
24
+ sampling_rate=sample_rate,
25
+ return_tensors="pt",
26
+ padding=True,
27
+ truncation=True,
28
+ max_length=32000 # Ajuste selon la durée de tes fichiers
29
+ )
30
+
31
+ # Envoyer les données sur le bon device (CPU ou GPU)
32
+ input_values = inputs["input_values"].to(device)
33
+
34
+ # Prédiction
35
+ with torch.no_grad():
36
+ logits = model(input_values).logits
37
+
38
+ # Trouver l'émotion prédite
39
+ predicted_class = torch.argmax(logits, dim=-1).item()
40
+
41
+ return LABELS[predicted_class] # Retourne le label correspondant
42
+
43
+ base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "data"))
44
+ audio_file = os.path.join(base_path, "colere", "c1ac.wav")
45
+ predicted_emotion = predict_emotion(audio_file)
46
+ print(f"🎙️ Émotion prédite : {predicted_emotion}")
47
+
48
+
49
+
src/train.py → train.py RENAMED
File without changes
train_speech.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ import os
4
+ from datasets import Dataset, DatasetDict
5
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification, TrainingArguments, Trainer
6
+
7
+ # 🔹 Paramètres
8
+ MODEL_NAME = "facebook/wav2vec2-large-xlsr-53-french"
9
+ NUM_LABELS = 3 # Nombre de classes émotionnelles
10
+ BATCH_SIZE = 8
11
+ EPOCHS = 10
12
+ LEARNING_RATE = 1e-4
13
+ MAX_LENGTH = 32000 # Ajuste selon la durée de tes fichiers audio
14
+
15
+ # 🔹 Vérifier GPU dispo
16
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
+
18
+ # 🔹 Charger le processeur et le modèle
19
+ processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
20
+ model = Wav2Vec2ForSequenceClassification.from_pretrained(
21
+ MODEL_NAME,
22
+ num_labels=NUM_LABELS,
23
+ problem_type="single_label_classification"
24
+ ).to(device)
25
+
26
+ # 🔹 Fonction pour charger les fichiers audio sans CSV
27
+ def load_audio_data(data_dir):
28
+ data = {"file_path": [], "label": []}
29
+ labels = ["colere", "joie", "neutre"] # Ajuste selon tes classes
30
+
31
+ for label in labels:
32
+ folder_path = os.path.join(data_dir, label)
33
+ for file in os.listdir(folder_path):
34
+ if file.endswith(".wav"):
35
+ data["file_path"].append(os.path.join(folder_path, file))
36
+ data["label"].append(labels.index(label))
37
+
38
+ dataset = Dataset.from_dict(data)
39
+ train_test_split = dataset.train_test_split(test_size=0.2) # 80% train, 20% test
40
+ return DatasetDict({"train": train_test_split["train"], "test": train_test_split["test"]})
41
+
42
+ # 🔹 Prétraitement de l'audio
43
+ def preprocess_audio(file_path):
44
+ waveform, sample_rate = torchaudio.load(file_path)
45
+ inputs = processor(
46
+ waveform.squeeze().numpy(),
47
+ sampling_rate=sample_rate,
48
+ return_tensors="pt",
49
+ padding=True,
50
+ truncation=True,
51
+ max_length=MAX_LENGTH # ✅ Correction de l'erreur
52
+ )
53
+ return inputs["input_values"][0] # Récupère les valeurs audio prétraitées
54
+
55
+ # 🔹 Charger et prétraiter le dataset
56
+ data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "data"))
57
+ ds = load_audio_data(data_dir)
58
+
59
+ def preprocess_batch(batch):
60
+ batch["input_values"] = preprocess_audio(batch["file_path"])
61
+ return batch
62
+
63
+ ds = ds.map(preprocess_batch, remove_columns=["file_path"])
64
+
65
+ # 🔹 Définir les arguments d'entraînement
66
+ training_args = TrainingArguments(
67
+ output_dir="./wav2vec2_emotion",
68
+ evaluation_strategy="epoch",
69
+ save_strategy="epoch",
70
+ learning_rate=LEARNING_RATE,
71
+ per_device_train_batch_size=BATCH_SIZE,
72
+ per_device_eval_batch_size=BATCH_SIZE,
73
+ num_train_epochs=EPOCHS,
74
+ save_total_limit=2,
75
+ logging_dir="./logs",
76
+ logging_steps=10,
77
+ )
78
+
79
+ # 🔹 Définir le trainer
80
+ trainer = Trainer(
81
+ model=model,
82
+ args=training_args,
83
+ train_dataset=ds["train"],
84
+ eval_dataset=ds["test"],
85
+ )
86
+
87
+ # 🚀 Lancer l'entraînement
88
+ trainer.train()
utils.py DELETED
@@ -1,4 +0,0 @@
1
- import streamlit as st
2
- import datetime
3
-
4
-
 
 
 
 
 
{src/utils → utils}/__init__.py RENAMED
File without changes
{src/utils → utils}/dataset.py RENAMED
@@ -1,6 +1,6 @@
1
  import os
2
  from datasets import Dataset
3
- from src.config import LABELS
4
  import pandas as pd
5
 
6
  def load_audio_data(data_dir):
 
1
  import os
2
  from datasets import Dataset
3
+ from config import LABELS
4
  import pandas as pd
5
 
6
  def load_audio_data(data_dir):
{src/utils → utils}/preprocessing.py RENAMED
@@ -3,8 +3,8 @@ import soundfile as sf
3
  import torch
4
  import torchaudio
5
  import numpy as np
6
- from src.model.feature_extractor import processor # type: ignore
7
- from src.config import DEVICE
8
 
9
  # Resampler pour convertir en 16kHz
10
  resampler = torchaudio.transforms.Resample(orig_freq=48_000, new_freq=16_000)
 
3
  import torch
4
  import torchaudio
5
  import numpy as np
6
+ from model.feature_extractor import processor # type: ignore
7
+ from config import DEVICE
8
 
9
  # Resampler pour convertir en 16kHz
10
  resampler = torchaudio.transforms.Resample(orig_freq=48_000, new_freq=16_000)
views/about.py DELETED
@@ -1,21 +0,0 @@
1
- import streamlit as st
2
-
3
-
4
- def about():
5
- st.title("About")
6
-
7
- col1, col2 = st.columns(2)
8
-
9
- with col1:
10
- st.markdown("### About")
11
- st.write("This dashboard is maintained by the M2 SISE team.")
12
- st.write("For more information, please visit the [GitHub repository](https://github.com/jdalfons/sise-ultimate-challenge/tree/main).")
13
-
14
- with col2:
15
- st.markdown("### Collaborators")
16
- st.write("""
17
- - [Falonne Kpamegan](https://github.com/marinaKpamegan)
18
- - [Nancy Randriamiarijaona](https://github.com/yminanc)
19
- - [Cyril Kocab](https://github.com/Cyr-CK)
20
- - [Juan Alfonso](https://github.com/jdalfons)
21
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
views/emotion_analysis.py DELETED
@@ -1,150 +0,0 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import os
4
- import matplotlib.pyplot as plt
5
- import librosa
6
- from src.predict import predict_emotion
7
-
8
- DIRECTORY = "audios"
9
- FILE_NAME = "audio.wav"
10
- RATE = 16000
11
-
12
- def emotion_analysis():
13
-
14
- st.header("❤️ Emotion Analysis")
15
-
16
- if st.session_state.audio_file is None:
17
- st.info("Please, upload or record an audio file in the studio tab")
18
- st.stop()
19
- else:
20
- audio_file = st.session_state.audio_file
21
-
22
- start_inference = st.button("Start emotion recogniton","inf_on_upl_btn")
23
- emotion_labels = ["colere", "neutre", "joie"]
24
- colors = ['#f71c1c', '#cac8c8', '#f6d60a']
25
-
26
- if start_inference:
27
- # Configuration Streamlit
28
- with st.spinner("Real-time emotion analysis..."):
29
- # uploaded_file = st.file_uploader("Choisissez un fichier audio", type=["wav", "mp3"])
30
-
31
- if audio_file is not None:
32
- # Charger et rééchantillonner l'audio
33
- audio, sr = librosa.load(audio_file, sr=RATE)
34
- # chunk = audio_file
35
-
36
- # Paramètres de la fenêtre glissante
37
- window_size = 1 # 1 seconde de données
38
- hop_length = 0.5 # 0.5 secondes de chevauchement
39
-
40
- # Créer un graphique en temps réel
41
- fig, ax = plt.subplots()
42
- lines = [ax.plot([], [], label=emotion)[0] for emotion in emotion_labels]
43
- ax.set_ylim(0, 1)
44
- ax.set_xlim(0, len(audio) / sr)
45
- ax.set_xlabel("Temps (s)")
46
- ax.set_ylabel("Probabilité")
47
-
48
- chart = st.pyplot(fig)
49
-
50
- scores = [[],[],[]] # 3 émotions pour l'instant
51
-
52
- # Traitement par fenêtre glissante
53
- for i in range(0, len(audio), int(hop_length * sr)):
54
- chunk = audio[i:i + int(window_size * sr)]
55
- if len(chunk) < int(window_size * sr):
56
- break
57
-
58
- emotion_scores = predict_emotion(chunk, output_probs=True, sampling_rate=RATE)
59
-
60
- # Mettre à jour le graphique
61
- for emotion, line in zip(emotion_labels, lines):
62
- xdata = list(line.get_xdata())
63
- ydata = list(line.get_ydata())
64
- colour = colors[list(emotion_scores).index(emotion)]
65
- xdata.append(i / sr)
66
- ydata.append(emotion_scores[emotion])
67
- scores[list(emotion_scores).index(emotion)].append(emotion_scores[emotion])
68
- line.set_data(xdata, ydata)
69
- line.set_color(colour)
70
-
71
- ax.relim()
72
- ax.autoscale_view()
73
- ax.legend()
74
- chart.pyplot(fig, use_container_width=True)
75
-
76
- # Prepare the styling
77
- st.markdown("""
78
- <style>
79
- .colored-box {
80
- padding: 10px;
81
- border-radius: 5px;
82
- color: white;
83
- font-weight: bold;
84
- text-align: center;
85
- }
86
- </style>
87
- """
88
- , unsafe_allow_html=True)
89
-
90
- # Dynamically create the specified number of columns
91
- columns = st.columns(len(emotion_scores))
92
-
93
- # emotion_scores_mean = [sum(sublist) / len(sublist) for sublist in scores]
94
- emotion_scores_mean = {emotion:sum(sublist) / len(sublist) for emotion, sublist in zip(emotion_labels, scores)}
95
- max_emo = max(emotion_scores_mean)
96
- emotion_scores_sorted = dict(sorted(emotion_scores_mean.items(), key=lambda x: x[1], reverse=True))
97
- colors_sorted = [colors[list(emotion_scores_mean.keys()).index(key)] for key in list(emotion_scores_sorted.keys())]
98
-
99
- # Add content to each column
100
- for i, (col, emotion) in enumerate(zip(columns, emotion_scores_sorted)):
101
- color = colors_sorted[i % len(colors_sorted)] # Cycle through colors if more columns than colors
102
- col.markdown(f"""
103
- <div class="colored-box" style="background-color: {color};">
104
- {emotion} : {100*emotion_scores_sorted[emotion]:.2f} %
105
- </div>
106
- """
107
- , unsafe_allow_html=True)
108
-
109
-
110
-
111
- st.success("Analyse terminée !")
112
- else:
113
- st.warning("You need to load an audio file !")
114
-
115
- if start_inference:
116
-
117
- st.subheader("Feedback")
118
-
119
- # Initialisation du fichier CSV
120
- csv_file = os.path.join("src","predictions","feedback.csv")
121
-
122
- # Vérifier si le fichier CSV existe, sinon le créer avec des colonnes appropriées
123
- if not os.path.exists(csv_file):
124
- df = pd.DataFrame(columns=["filepath", "prediction", "feedback"])
125
- df.to_csv(csv_file, index=False)
126
-
127
- # Charger les données existantes du CSV
128
- df = pd.read_csv(csv_file)
129
-
130
- with st.form("feedback_form"):
131
- st.write("What should have been the correct prediction ? (*Choose the same emotion if the prediction was correct*).")
132
- feedback = st.selectbox("Your answer :", ['Sadness','Anger', 'Disgust', 'Fear', 'Surprise', 'Joy', 'Neutral'])
133
- submit_button = st.form_submit_button("Submit")
134
- st.write("En cliquant sur ce bouton, vous acceptez que votre audio soit sauvegardé dans notre base de données.")
135
-
136
- if submit_button:
137
- # Ajouter le feedback au DataFrame
138
- new_entry = pd.DataFrame([{"filepath": audio_file.name, "prediction": max_emo, "feedback": feedback}])
139
- # df = df.append(new_entry, ignore_index=True)
140
- df = pd.concat([df, new_entry], ignore_index=True)
141
-
142
- # Sauvegarder les données mises à jour dans le fichier CSV
143
- df.to_csv(csv_file, index=False)
144
-
145
- # Sauvegarder le fichier audio
146
- with open(os.path.join("src","predictions","data",audio_file.name), "wb") as f:
147
- f.write(audio_file.getbuffer())
148
-
149
- # Confirmation pour l'utilisateur
150
- st.success("Merci pour votre retour ! Vos données ont été sauvegardées.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
views/real_time.py DELETED
@@ -1,327 +0,0 @@
1
- ################################
2
- ### Real time prediction for real time record
3
- ###############################
4
-
5
- import streamlit as st
6
- import pyaudio
7
- import wave
8
- import torch
9
- from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
10
- import matplotlib.pyplot as plt
11
- import numpy as np
12
- import time
13
-
14
- # Paramètres audio
15
- CHUNK = 1024
16
- FORMAT = pyaudio.paInt16
17
- CHANNELS = 1
18
- RATE = 16000
19
-
20
- # Interface Streamlit
21
- st.title("Détection des émotions en temps réel")
22
-
23
- # Boutons pour démarrer et arrêter l'enregistrement
24
- start_button = st.button("Démarrer l'enregistrement")
25
- stop_button = st.button("Arrêter l'enregistrement")
26
-
27
- # Zone de visualisation des émotions en temps réel
28
- emotion_placeholder = st.empty()
29
- final_emotion_placeholder = st.empty()
30
-
31
- if start_button:
32
- st.write("Enregistrement en cours...")
33
- audio = pyaudio.PyAudio()
34
- stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
35
-
36
- frames = []
37
- real_time_emotions = []
38
-
39
- while not stop_button:
40
- data = stream.read(CHUNK)
41
- frames.append(data)
42
-
43
- # Traitement en temps réel (par tranche de 1 seconde)
44
- if len(frames) >= RATE // CHUNK:
45
- audio_segment = np.frombuffer(b''.join(frames[-(RATE // CHUNK):]), dtype=np.int16)
46
- emotion = predict_emotion(audio_segment, output_probs=False, sampling_rate=RATE)
47
- real_time_emotions.append(emotion)
48
- emotion_placeholder.line_chart(real_time_emotions) # Affichage graphique des émotions
49
-
50
- # Arrêt de l'enregistrement
51
- stream.stop_stream()
52
- stream.close()
53
- audio.terminate()
54
-
55
- # Sauvegarde de l'audio enregistré
56
- wf = wave.open("output.wav", "wb")
57
- wf.setnchannels(CHANNELS)
58
- wf.setsampwidth(audio.get_sample_size(FORMAT))
59
- wf.setframerate(RATE)
60
- wf.writeframes(b"".join(frames))
61
- wf.close()
62
-
63
- # Prédiction finale sur tout l'audio enregistré
64
- full_audio_data = np.frombuffer(b''.join(frames), dtype=np.int16)
65
- final_emotion = predict_emotion(full_audio_data)
66
-
67
- final_emotion_placeholder.write(f"Émotion finale prédite : {final_emotion}")
68
-
69
-
70
- ################################
71
- ### Real time prediction for uploaded audio file
72
- ###############################
73
- # Charger le modèle wav2vec et le processeur
74
-
75
- # # Configuration Streamlit
76
- # st.title("Analyse des émotions en temps réel")
77
- # uploaded_file = st.file_uploader("Choisissez un fichier audio", type=["wav", "mp3"])
78
-
79
- # if uploaded_file is not None:
80
- # # Charger et rééchantillonner l'audio
81
- # audio, sr = librosa.load(uploaded_file, sr=16000)
82
-
83
- # # Paramètres de la fenêtre glissante
84
- # window_size = 1 # en secondes
85
- # hop_length = 0.5 # en secondes
86
-
87
- # # Créer un graphique en temps réel
88
- # fig, ax = plt.subplots()
89
- # lines = [ax.plot([], [], label=emotion)[0] for emotion in emotions]
90
- # ax.set_ylim(0, 1)
91
- # ax.set_xlim(0, len(audio) / sr)
92
- # ax.set_xlabel("Temps (s)")
93
- # ax.set_ylabel("Probabilité")
94
- # ax.legend()
95
-
96
- # chart = st.pyplot(fig)
97
-
98
- # # Traitement par fenêtre glissante
99
- # for i in range(0, len(audio), int(hop_length * sr)):
100
- # chunk = audio[i:i + int(window_size * sr)]
101
- # if len(chunk) < int(window_size * sr):
102
- # break
103
-
104
- # emotion_scores = predict_emotion(chunk, output_probs=False, sampling_rate=RATE)
105
-
106
- # # Mettre à jour le graphique
107
- # for emotion, line in zip(emotions, lines):
108
- # xdata = line.get_xdata().tolist()
109
- # ydata = line.get_ydata().tolist()
110
- # xdata.append(i / sr)
111
- # ydata.append(emotion_scores[emotion])
112
- # line.set_data(xdata, ydata)
113
-
114
- # ax.relim()
115
- # ax.autoscale_view()
116
- # chart.pyplot(fig)
117
-
118
- # st.success("Analyse terminée !")
119
-
120
-
121
-
122
-
123
-
124
-
125
-
126
- ############################################
127
- ### Progress bar
128
- ############################################
129
-
130
- with st.status("Downloading data...", expanded=True) as status:
131
- st.write("Searching for data...")
132
- time.sleep(2)
133
- st.write("Found URL.")
134
- time.sleep(1)
135
- st.write("Downloading data...")
136
- time.sleep(1)
137
- status.update(
138
- label="Download complete!", state="complete", expanded=False
139
- )
140
-
141
- st.button("Rerun")
142
-
143
-
144
- ############################################
145
- ### Time duration estimation
146
- ############################################
147
- progress_bar = st.progress(0)
148
- time_placeholder = st.empty()
149
-
150
- total_time = 10 # Total estimated time in seconds
151
- for i in range(total_time):
152
- # Update progress bar
153
- progress_bar.progress((i + 1) / total_time)
154
-
155
- # Update time estimation
156
- remaining_time = total_time - i - 1
157
- time_placeholder.text(f"Estimated time remaining: {remaining_time} seconds")
158
-
159
- # Simulate task progress
160
- time.sleep(1)
161
-
162
-
163
-
164
- ############################################
165
- ### Audio file noise reduction
166
- ############################################
167
- from pydub import AudioSegment
168
- import noisereduce as nr
169
- from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
170
-
171
- # Fonction de réduction de bruit
172
- def reduce_noise(audio_data, sr):
173
- reduced_noise = nr.reduce_noise(y=audio_data, sr=sr)
174
- return reduced_noise
175
-
176
- # Chargement du modèle wav2vec
177
- processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
178
- model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
179
-
180
- # Interface Streamlit
181
- st.title("Application de transcription audio avec réduction de bruit")
182
-
183
- uploaded_file = st.file_uploader("Choisissez un fichier audio .wav", type="wav")
184
-
185
- if uploaded_file is not None:
186
- # Chargement et prétraitement de l'audio
187
- audio = AudioSegment.from_wav(uploaded_file)
188
- audio_array = np.array(audio.get_array_of_samples())
189
-
190
- # Réduction de bruit
191
- reduced_noise_audio = reduce_noise(audio_array, audio.frame_rate)
192
-
193
- # Traitement avec wav2vec
194
- input_values = processor(reduced_noise_audio, sampling_rate=audio.frame_rate, return_tensors="pt").input_values
195
-
196
- with torch.no_grad():
197
- logits = model(input_values).logits
198
-
199
- predicted_ids = torch.argmax(logits, dim=-1)
200
- transcription = processor.batch_decode(predicted_ids)[0]
201
-
202
- st.audio(uploaded_file, format="audio/wav")
203
- st.write("Transcription:")
204
- st.write(transcription)
205
-
206
-
207
- ############################################
208
- ### Choix des émotions
209
- ############################################
210
- # options = ['Sadness','Anger', 'Disgust', 'Fear', 'Surprise', 'Joy','Neutral']
211
- # selected_options = st.multiselect('What emotions do you want to be displayed', options, default=['Joy', 'Anger','Neutral])
212
-
213
-
214
- ############################################
215
- ### Transcription Speech2Text
216
- ############################################
217
- # # Fonction pour transcrire l'audio
218
- # def transcribe_audio(audio):
219
- # # Préparer les données d'entrée pour le modèle
220
- # input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
221
-
222
- # # Passer les données dans le modèle pour obtenir les logits
223
- # with torch.no_grad():
224
- # logits = model(input_values).logits
225
-
226
- # # Décoder les prédictions en texte
227
- # predicted_ids = torch.argmax(logits, dim=-1)
228
- # transcription = processor.batch_decode(predicted_ids)[0]
229
- # return transcription
230
-
231
- # # Charger et transcrire l'audio
232
- # # audio, rate = load_audio(audio_file_path) # (re)chargement de l'audio si nécessaire
233
- # transcription = transcribe_audio(audio)
234
-
235
- # # Afficher la transcription
236
- # print("Transcription :", transcription)
237
-
238
-
239
- ############################################
240
- ### Feedback
241
- ############################################
242
- import pandas as pd
243
- import os
244
-
245
- # Initialisation du fichier CSV
246
- csv_file = "predictions/feedback.csv"
247
-
248
- # Vérifier si le fichier CSV existe, sinon le créer avec des colonnes appropriées
249
- if not os.path.exists(csv_file):
250
- df = pd.DataFrame(columns=["filepath", "prediction", "feedback"])
251
- df.to_csv(csv_file, index=False)
252
-
253
- # Charger les données existantes du CSV
254
- df = pd.read_csv(csv_file)
255
-
256
- # Interface Streamlit
257
- st.title("Predicted emotion feedback")
258
-
259
- # Simuler une prédiction pour l'exemple (remplacez par votre modèle réel)
260
- audio_file_name = "example_audio.wav"
261
- predicted_emotion = "Joie" # Exemple de prédiction
262
-
263
- st.write(f"Fichier audio : {audio_file_name}")
264
- st.write(f"Émotion détectée : {predicted_emotion}")
265
-
266
- # Formulaire de feedback
267
- with st.form("feedback_form"):
268
- st.write("Est-ce la bonne émotion qui a été détectée ? Cochez la réelle émotion.")
269
- feedback = st.selectbox("Votre réponse :", ['Sadness','Anger', 'Disgust', 'Fear', 'Surprise', 'Joy', 'Neutral'])
270
- submit_button = st.form_submit_button("Soumettre")
271
- st.write("En cliquant sur ce bouton, vous acceptez que votre audio soit sauvegardé dans notre base de données.")
272
-
273
- if submit_button:
274
- # Ajouter le feedback au DataFrame
275
- new_entry = {"filepath": audio_file_name, "prediction": predicted_emotion, "feedback": feedback}
276
- df = df.append(new_entry, ignore_index=True)
277
-
278
- # Sauvegarder les données mises à jour dans le fichier CSV
279
- df.to_csv(csv_file, index=False)
280
-
281
- # Sauvegarder le fichier audio
282
- with open("predictions/data", "wb") as f:
283
- f.write(uploaded_file.getbuffer())
284
-
285
- # Confirmation pour l'utilisateur
286
- st.success("Merci pour votre retour ! Vos données ont été sauvegardées.")
287
-
288
- # Afficher les données sauvegardées (optionnel)
289
- # st.write("Données collectées jusqu'à présent :")
290
- # st.dataframe(df)
291
-
292
-
293
-
294
-
295
-
296
-
297
-
298
-
299
-
300
-
301
-
302
-
303
-
304
-
305
-
306
- ############################################
307
- ### Predict proba (to replace in predict.py)
308
- ############################################
309
- import librosa
310
- def predict_emotion_probabilities(audio_path):
311
- waveform, _ = librosa.load(audio_path, sr=16000)
312
- input_values = processor(waveform, return_tensors="pt", sampling_rate=16000).input_values
313
- input_values = input_values.to(device)
314
-
315
- with torch.no_grad():
316
- outputs = model(input_values)
317
-
318
- # Appliquer softmax pour obtenir des probabilités
319
- probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
320
-
321
- # Convertir en numpy array et prendre le premier (et seul) élément
322
- probabilities = probabilities[0].detach().cpu().numpy()
323
-
324
- # Créer un dictionnaire associant chaque émotion à sa probabilité
325
- emotion_probabilities = {emotion: prob for emotion, prob in zip(emotion_labels, probabilities)}
326
-
327
- return emotion_probabilities
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
views/studio.py DELETED
@@ -1,176 +0,0 @@
1
- import streamlit as st
2
- from st_audiorec import st_audiorec
3
-
4
- from src.model.transcriber import transcribe_audio
5
-
6
-
7
- def studio():
8
- st.title("SISE ultimate challenge")
9
- st.write("C'est le dernier challenge de la formation SISE.")
10
- st.markdown("""
11
- **Overview:**
12
- - Analyse de logs
13
- - Analyse de données
14
- - Machine learning
15
- """)
16
-
17
- st.markdown("---")
18
-
19
- st.header("🎧 Audio File Studio")
20
-
21
- tab1, tab2, tab3 = st.tabs(["⬆️ Record Audio", "🔈 Realtime Audio", "📝 Transcription"])
22
-
23
- with tab1:
24
- st.header("⬆️ Upload Audio Record")
25
- st.write("Here you can upload a pre-recorded audio.")
26
- audio_file = st.file_uploader("Upload an audio file", type=["wav"])
27
-
28
- if "audio_file" not in st.session_state:
29
- st.session_state.audio_file = None
30
-
31
- if audio_file is not None:
32
- st.success("Audio file uploaded successfully !")
33
- st.session_state.audio_file = audio_file
34
-
35
- # with open(os.path.join(DIRECTORY,FILE_NAME), "wb") as f:
36
- # f.write(audio_file.getbuffer())
37
- # st.success(f"Saved file: {FILE_NAME}")
38
-
39
-
40
-
41
- with tab2:
42
- st.header("🔈 Realtime Audio Record")
43
- st.write("Here you can record an audio.")
44
-
45
- if "audio_file" not in st.session_state:
46
- st.session_state.audio_file = None
47
-
48
- audio_file = st_audiorec()
49
-
50
- if audio_file is not None:
51
- st.audio(audio_file, format='audio/wav')
52
- st.success("Audio recorded successfully !")
53
- st.session_state.audio_file = audio_file
54
-
55
- ##############################################"realtime audio record"##############################################
56
- # Boutons pour démarrer et arrêter l'enregistrement
57
- # start_button = st.button("Démarrer l'enregistrement")
58
- # stop_button = st.button("Arrêter l'enregistrement")
59
- # start_stop = st.button("Démarrer/Arrêter l'enregistrement")
60
-
61
-
62
- # Zone de visualisation des émotions en temps réel
63
- # emotion_placeholder = st.empty()
64
- # final_emotion_placeholder = st.empty()
65
- # audio = pyaudio.PyAudio()
66
- # audio_buffer = np.array([])
67
- # emotion_prediction = "Aucune prédiction"
68
- # is_recording = False
69
-
70
- # if start_stop:
71
- # is_recording = not is_recording
72
-
73
- # # Variables globales pour le partage de données entre threads
74
- # def audio_callback(in_data, frame_count, time_info, status):
75
- # global audio_buffer
76
- # audio_data = np.frombuffer(in_data, dtype=np.float32)
77
- # audio_buffer = np.concatenate((audio_buffer, audio_data))
78
- # return (in_data, pyaudio.paContinue)
79
-
80
- # def predict_emotion_thread():
81
- # global audio_buffer, emotion_prediction
82
- # while is_recording:
83
- # if len(audio_buffer) >= CHUNK:
84
- # chunk = audio_buffer[:CHUNK]
85
- # audio_buffer = audio_buffer[STRIDE:]
86
- # emotion_prediction = predict_emotion(chunk, output_probs=False, sampling_rate=RATE) # Utilisez votre modèle ici
87
- # # time.sleep(0.1)
88
-
89
- # if is_recording:
90
- # audio_buffer = np.array([])
91
- # stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True,
92
- # frames_per_buffer=CHUNK, stream_callback=audio_callback)
93
- # stream.start_stream()
94
- # threading.Thread(target=predict_emotion_thread, daemon=True).start()
95
- # st.write("Enregistrement en cours...")
96
- # else:
97
- # stream.stop_stream()
98
- # stream.close()
99
- # st.write("Enregistrement arrêté.")
100
-
101
- # emotion_display = st.empty()
102
-
103
- # while is_recording:
104
- # emotion_display.write(f"Émotion détectée : {emotion_prediction}")
105
- # # time.sleep(0.1)
106
-
107
- # audio.terminate(
108
-
109
-
110
- # stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
111
-
112
- # frames = []
113
- # real_time_emotions = []
114
-
115
- # while not stop_button:
116
- # data = stream.read(CHUNK)
117
- # frames.append(data)
118
-
119
- # # Traitement en temps réel (par tranche de 1 seconde)
120
- # if len(frames) >= RATE // CHUNK:
121
- # audio_segment = np.frombuffer(b''.join(frames[-(RATE // CHUNK):]), dtype=np.int16)
122
- # emotion = predict_emotion(audio_segment, output_probs=False, sampling_rate=RATE)
123
- # real_time_emotions.append(emotion)
124
- # emotion_placeholder.line_chart(real_time_emotions) # Affichage graphique des émotions
125
-
126
- # # Arrêt de l'enregistrement
127
- # stream.stop_stream()
128
- # stream.close()
129
- # audio.terminate()
130
-
131
- # # Sauvegarde de l'audio enregistré
132
- # wf = wave.open("output.wav", "wb")
133
- # wf.setnchannels(CHANNELS)
134
- # wf.setsampwidth(audio.get_sample_size(FORMAT))
135
- # wf.setframerate(RATE)
136
- # wf.writeframes(b"".join(frames))
137
- # wf.close()
138
-
139
- # # Prédiction finale sur tout l'audio enregistré
140
- # full_audio_data = np.frombuffer(b''.join(frames), dtype=np.int16)
141
- # final_emotion = predict_emotion(full_audio_data)
142
-
143
- # final_emotion_placeholder.write(f"Émotion finale prédite : {final_emotion}")
144
-
145
-
146
- ##############################################"end realtime audio record"##############################################
147
-
148
- with tab3:
149
- st.header("📝 Speech2Text Transcription")
150
- st.write("Here you can get the audio transcript.")
151
-
152
- save = st.checkbox("Save transcription to .txt", value=False, key="save-transcript")
153
-
154
- ############################# A décommenté quand ce sera débogué
155
- if st.button("Transcribe", key="transcribe-button"):
156
- # Fonction pour transcrire l'audio
157
- transcription = transcribe_audio(st.audio)
158
-
159
- # Charger et transcrire l'audio
160
- # audio, rate = load_audio(audio_file_path) # (re)chargement de l'audio si nécessaire
161
- transcription = transcribe_audio(audio_file, sampling_rate=16000)
162
-
163
- # Afficher la transcription
164
- st.write("Transcription :", transcription)
165
-
166
- st.success("Audio registered successfully.")
167
- if save:
168
- file_path = "transcript.txt"
169
-
170
- # Write the text to the file
171
- with open(file_path, "w") as file:
172
- file.write(transcription)
173
-
174
- st.success(f"Text saved to {file_path}")
175
-
176
-