Spaces:
Sleeping
Sleeping
:rocket: feature new interface
Browse files- .gitignore +1 -1
- .streamlit/config.toml +1 -1
- app.py +243 -40
- src/config.py → config.py +1 -1
- img copy/logo_01.png +0 -0
- __init__.py → model/__init__.py +0 -0
- {src/model → model}/emotion_classifier.py +0 -0
- {src/model → model}/feature_extractor.py +1 -1
- {src/model → model}/transcriber.py +0 -0
- src/predict.py → predict.py +2 -2
- src/__init__.py +0 -0
- src/data/dataset.csv +0 -113
- src/data/processing.ipynb +0 -113
- src/model/__init__.py +0 -0
- src/predictions/feedback.csv +0 -1
- src/test_backend.ipynb +0 -63
- test_speech.py +49 -0
- src/train.py → train.py +0 -0
- train_speech.py +88 -0
- utils.py +0 -4
- {src/utils → utils}/__init__.py +0 -0
- {src/utils → utils}/dataset.py +1 -1
- {src/utils → utils}/preprocessing.py +2 -2
- views/about.py +0 -21
- views/emotion_analysis.py +0 -150
- views/real_time.py +0 -327
- views/studio.py +0 -176
.gitignore
CHANGED
@@ -180,7 +180,7 @@ old/
|
|
180 |
*.wav
|
181 |
data/*
|
182 |
*.pth
|
183 |
-
|
184 |
# Mac
|
185 |
.DS_Store
|
186 |
.idea
|
|
|
180 |
*.wav
|
181 |
data/*
|
182 |
*.pth
|
183 |
+
old/
|
184 |
# Mac
|
185 |
.DS_Store
|
186 |
.idea
|
.streamlit/config.toml
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
[theme]
|
2 |
-
base="
|
3 |
primaryColor="#7c99b4"
|
4 |
|
|
|
1 |
[theme]
|
2 |
+
base="light"
|
3 |
primaryColor="#7c99b4"
|
4 |
|
app.py
CHANGED
@@ -1,45 +1,248 @@
|
|
1 |
import streamlit as st
|
2 |
-
|
3 |
-
|
4 |
-
from views.emotion_analysis import emotion_analysis
|
5 |
-
from views.about import about
|
6 |
import os
|
7 |
-
import
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
)
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
|
|
|
|
4 |
import os
|
5 |
+
import time
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
from datetime import datetime
|
8 |
+
import tempfile
|
9 |
+
import io
|
10 |
+
import json
|
11 |
+
from model.transcriber import transcribe_audio
|
12 |
+
from predict import predict_emotion
|
13 |
+
|
14 |
+
# You'll need to install this package:
|
15 |
+
# pip install streamlit-audiorec
|
16 |
+
from st_audiorec import st_audiorec
|
17 |
+
|
18 |
+
# Page configuration
|
19 |
+
st.set_page_config(
|
20 |
+
page_title="Emotion Analyser",
|
21 |
+
page_icon="🎤",
|
22 |
+
layout="wide"
|
23 |
+
)
|
24 |
+
|
25 |
+
# Initialize session state variables if they don't exist
|
26 |
+
if 'audio_data' not in st.session_state:
|
27 |
+
st.session_state.audio_data = []
|
28 |
+
if 'current_audio_index' not in st.session_state:
|
29 |
+
st.session_state.current_audio_index = -1
|
30 |
+
if 'audio_history_csv' not in st.session_state:
|
31 |
+
# Define columns for our CSV storage
|
32 |
+
st.session_state.audio_history_csv = pd.DataFrame(
|
33 |
+
columns=['timestamp', 'file_path', 'transcription', 'emotion', 'probabilities']
|
34 |
+
)
|
35 |
+
if 'needs_rerun' not in st.session_state:
|
36 |
+
st.session_state.needs_rerun = False
|
37 |
+
|
38 |
+
# Function to ensure we keep only the last 10 entries
|
39 |
+
def update_audio_history(new_entry):
|
40 |
+
# Add the new entry
|
41 |
+
st.session_state.audio_history_csv = pd.concat([st.session_state.audio_history_csv, pd.DataFrame([new_entry])], ignore_index=True)
|
42 |
+
|
43 |
+
# Keep only the last 10 entries
|
44 |
+
if len(st.session_state.audio_history_csv) > 10:
|
45 |
+
st.session_state.audio_history_csv = st.session_state.audio_history_csv.iloc[-10:]
|
46 |
+
|
47 |
+
# Save to CSV
|
48 |
+
st.session_state.audio_history_csv.to_csv('audio_history.csv', index=False)
|
49 |
+
|
50 |
+
# Function to process audio and get results
|
51 |
+
def process_audio(audio_path):
|
52 |
+
try:
|
53 |
+
# Get transcription
|
54 |
+
transcription = transcribe_audio(audio_path)
|
55 |
+
|
56 |
+
# Get emotion prediction
|
57 |
+
predicted_emotion, probabilities = predict_emotion(audio_path)
|
58 |
+
|
59 |
+
# Update audio history
|
60 |
+
new_entry = {
|
61 |
+
'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
62 |
+
'file_path': audio_path,
|
63 |
+
'transcription': transcription,
|
64 |
+
'emotion': predicted_emotion,
|
65 |
+
'probabilities': str(probabilities) # Convert dict to string for storage
|
66 |
+
}
|
67 |
+
update_audio_history(new_entry)
|
68 |
+
|
69 |
+
# Update current index
|
70 |
+
st.session_state.current_audio_index = len(st.session_state.audio_history_csv) - 1
|
71 |
+
|
72 |
+
return transcription, predicted_emotion, probabilities
|
73 |
+
except Exception as e:
|
74 |
+
st.error(f"Error processing audio: {str(e)}")
|
75 |
+
return None, None, None
|
76 |
+
|
77 |
+
# Function to split audio into 10-second segments
|
78 |
+
def split_audio(audio_file, segment_length=10):
|
79 |
+
# This is a placeholder - in a real implementation, you'd use a library like pydub
|
80 |
+
# to split the audio file into segments
|
81 |
+
st.warning("Audio splitting functionality is a placeholder. Implement with pydub or similar library.")
|
82 |
+
# For now, we'll just return the whole file as a single segment
|
83 |
+
return [audio_file]
|
84 |
+
|
85 |
+
# Function to display emotion visualization
|
86 |
+
def display_emotion_chart(probabilities):
|
87 |
+
emotions = list(probabilities.keys())
|
88 |
+
values = list(probabilities.values())
|
89 |
+
|
90 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
91 |
+
bars = ax.bar(emotions, values, color=['red', 'gray', 'green'])
|
92 |
+
|
93 |
+
# Add data labels on top of bars
|
94 |
+
for bar in bars:
|
95 |
+
height = bar.get_height()
|
96 |
+
ax.text(bar.get_x() + bar.get_width()/2., height + 0.02,
|
97 |
+
f'{height:.2f}', ha='center', va='bottom')
|
98 |
+
|
99 |
+
ax.set_ylim(0, 1.1)
|
100 |
+
ax.set_ylabel('Probability')
|
101 |
+
ax.set_title('Emotion Prediction Results')
|
102 |
+
|
103 |
+
st.pyplot(fig)
|
104 |
+
|
105 |
+
# Trigger rerun if needed (replaces experimental_rerun)
|
106 |
+
if st.session_state.needs_rerun:
|
107 |
+
st.session_state.needs_rerun = False
|
108 |
+
st.rerun() # Using st.rerun() instead of experimental_rerun
|
109 |
+
|
110 |
+
# Main App Layout
|
111 |
+
st.image("./img/logo_01.png", width=400)
|
112 |
+
|
113 |
+
# Create two columns for the main layout
|
114 |
+
col1, col2 = st.columns([1, 1])
|
115 |
+
|
116 |
+
with col1:
|
117 |
+
st.header("Audio Input")
|
118 |
+
|
119 |
+
# Method selection
|
120 |
+
|
121 |
+
tab1, tab2 = st.tabs(["Record Audio", "Upload Audio"])
|
122 |
+
|
123 |
+
with tab1:
|
124 |
+
st.write("Record your audio (max 10 seconds):")
|
125 |
+
|
126 |
+
# Using streamlit-audiorec for better recording functionality
|
127 |
+
wav_audio_data = st_audiorec()
|
128 |
+
|
129 |
+
if wav_audio_data is not None:
|
130 |
+
# Save the recorded audio to a temporary file
|
131 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
|
132 |
+
tmp_file.write(wav_audio_data)
|
133 |
+
tmp_file_path = tmp_file.name
|
134 |
+
|
135 |
+
st.success("Audio recorded successfully!")
|
136 |
+
|
137 |
+
# Process button
|
138 |
+
if st.button("Process Recorded Audio"):
|
139 |
+
# Process the audio
|
140 |
+
with st.spinner("Processing audio..."):
|
141 |
+
transcription, emotion, probs = process_audio(tmp_file_path)
|
142 |
+
# Set flag for rerun instead of calling experimental_rerun
|
143 |
+
if transcription is not None:
|
144 |
+
st.success("Audio processed successfully!")
|
145 |
+
st.session_state.needs_rerun = True
|
146 |
+
|
147 |
+
with tab2:
|
148 |
+
uploaded_file = st.file_uploader("Upload an audio file (WAV format)", type=['wav'])
|
149 |
+
|
150 |
+
if uploaded_file is not None:
|
151 |
+
# Save the uploaded file to a temporary location
|
152 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
|
153 |
+
tmp_file.write(uploaded_file.getbuffer())
|
154 |
+
tmp_file_path = tmp_file.name
|
155 |
+
|
156 |
+
st.audio(uploaded_file, format="audio/wav")
|
157 |
+
|
158 |
+
# Process button
|
159 |
+
if st.button("Process Uploaded Audio"):
|
160 |
+
# Split audio into 10-second segments
|
161 |
+
with st.spinner("Processing audio..."):
|
162 |
+
segments = split_audio(tmp_file_path)
|
163 |
+
|
164 |
+
# Process each segment
|
165 |
+
for i, segment_path in enumerate(segments):
|
166 |
+
st.write(f"Processing segment {i+1}...")
|
167 |
+
transcription, emotion, probs = process_audio(segment_path)
|
168 |
+
|
169 |
+
# Set flag for rerun instead of calling experimental_rerun
|
170 |
+
st.success("Audio processed successfully!")
|
171 |
+
st.session_state.needs_rerun = True
|
172 |
+
|
173 |
+
with col2:
|
174 |
+
st.header("Results")
|
175 |
+
|
176 |
+
# Display results if available
|
177 |
+
if st.session_state.current_audio_index >= 0 and len(st.session_state.audio_history_csv) > 0:
|
178 |
+
current_data = st.session_state.audio_history_csv.iloc[st.session_state.current_audio_index]
|
179 |
+
|
180 |
+
# Transcription
|
181 |
+
st.subheader("Transcription")
|
182 |
+
st.text_area("", value=current_data['transcription'], height=100, key="transcription_area")
|
183 |
+
|
184 |
+
# Emotion
|
185 |
+
st.subheader("Detected Emotion")
|
186 |
+
st.info(f"🎭 Predicted emotion: **{current_data['emotion']}**")
|
187 |
+
|
188 |
+
# Convert string representation of dict back to actual dict
|
189 |
+
try:
|
190 |
+
import ast
|
191 |
+
probs = ast.literal_eval(current_data['probabilities'])
|
192 |
+
display_emotion_chart(probs)
|
193 |
+
except Exception as e:
|
194 |
+
st.error(f"Error parsing probabilities: {str(e)}")
|
195 |
+
st.write(f"Raw probabilities: {current_data['probabilities']}")
|
196 |
+
else:
|
197 |
+
st.info("Record or upload audio to see results")
|
198 |
+
|
199 |
+
# Audio History and Analytics Section
|
200 |
+
st.header("Audio History and Analytics")
|
201 |
+
|
202 |
+
if len(st.session_state.audio_history_csv) > 0:
|
203 |
+
# Display a select box to choose from audio history
|
204 |
+
timestamps = st.session_state.audio_history_csv['timestamp'].tolist()
|
205 |
+
selected_timestamp = st.selectbox(
|
206 |
+
"Select audio from history:",
|
207 |
+
options=timestamps,
|
208 |
+
index=len(timestamps) - 1 # Default to most recent
|
209 |
)
|
210 |
|
211 |
+
# Update current index when selection changes
|
212 |
+
selected_index = st.session_state.audio_history_csv[
|
213 |
+
st.session_state.audio_history_csv['timestamp'] == selected_timestamp
|
214 |
+
].index[0]
|
215 |
+
|
216 |
+
# Only update if different
|
217 |
+
if st.session_state.current_audio_index != selected_index:
|
218 |
+
st.session_state.current_audio_index = selected_index
|
219 |
+
st.session_state.needs_rerun = True
|
220 |
|
221 |
+
# Analytics button
|
222 |
+
if st.button("Run Analytics on Selected Audio"):
|
223 |
+
st.subheader("Analytics Results")
|
224 |
+
|
225 |
+
# Get the selected audio data
|
226 |
+
selected_data = st.session_state.audio_history_csv.iloc[selected_index]
|
227 |
+
|
228 |
+
# Display analytics (this is where you would add more sophisticated analytics)
|
229 |
+
st.write(f"Selected Audio: {selected_data['timestamp']}")
|
230 |
+
st.write(f"Emotion: {selected_data['emotion']}")
|
231 |
+
st.write(f"File Path: {selected_data['file_path']}")
|
232 |
+
|
233 |
+
# Add any additional analytics you want here
|
234 |
+
|
235 |
+
# Try to play the selected audio
|
236 |
+
try:
|
237 |
+
if os.path.exists(selected_data['file_path']):
|
238 |
+
st.audio(selected_data['file_path'], format="audio/wav")
|
239 |
+
else:
|
240 |
+
st.warning("Audio file not found - it may have been deleted or moved.")
|
241 |
+
except Exception as e:
|
242 |
+
st.error(f"Error playing audio: {str(e)}")
|
243 |
+
else:
|
244 |
+
st.info("No audio history available. Record or upload audio to create history.")
|
245 |
+
|
246 |
+
# Footer
|
247 |
+
st.markdown("---")
|
248 |
+
st.caption("Audio Emotion Analyzer - Processes audio in 10-second segments and predicts emotions")
|
src/config.py → config.py
RENAMED
@@ -21,5 +21,5 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
21 |
MODEL_NAME = "facebook/wav2vec2-large-xlsr-53-french"
|
22 |
|
23 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
24 |
-
BEST_MODEL_NAME = os.path.join(BASE_DIR, "
|
25 |
|
|
|
21 |
MODEL_NAME = "facebook/wav2vec2-large-xlsr-53-french"
|
22 |
|
23 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
24 |
+
BEST_MODEL_NAME = os.path.join(BASE_DIR, "model","fr-speech-emotion-model.pth") # Monte d'un niveau pour aller à la racine
|
25 |
|
img copy/logo_01.png
ADDED
![]() |
__init__.py → model/__init__.py
RENAMED
File without changes
|
{src/model → model}/emotion_classifier.py
RENAMED
File without changes
|
{src/model → model}/feature_extractor.py
RENAMED
@@ -1,6 +1,6 @@
|
|
1 |
import torch
|
2 |
from transformers import Wav2Vec2Model, Wav2Vec2Processor
|
3 |
-
from
|
4 |
|
5 |
processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
|
6 |
feature_extractor = Wav2Vec2Model.from_pretrained(MODEL_NAME).to(DEVICE)
|
|
|
1 |
import torch
|
2 |
from transformers import Wav2Vec2Model, Wav2Vec2Processor
|
3 |
+
from config import MODEL_NAME, DEVICE
|
4 |
|
5 |
processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
|
6 |
feature_extractor = Wav2Vec2Model.from_pretrained(MODEL_NAME).to(DEVICE)
|
{src/model → model}/transcriber.py
RENAMED
File without changes
|
src/predict.py → predict.py
RENAMED
@@ -4,8 +4,8 @@ import torch
|
|
4 |
import librosa
|
5 |
import numpy as np
|
6 |
from model.emotion_classifier import EmotionClassifier
|
7 |
-
from
|
8 |
-
from
|
9 |
|
10 |
# Charger le modèle entraîné
|
11 |
feature_dim = 40 # Nombre de MFCCs utilisés
|
|
|
4 |
import librosa
|
5 |
import numpy as np
|
6 |
from model.emotion_classifier import EmotionClassifier
|
7 |
+
from utils.preprocessing import collate_fn
|
8 |
+
from config import DEVICE, NUM_LABELS, BEST_MODEL_NAME
|
9 |
|
10 |
# Charger le modèle entraîné
|
11 |
feature_dim = 40 # Nombre de MFCCs utilisés
|
src/__init__.py
DELETED
File without changes
|
src/data/dataset.csv
DELETED
@@ -1,113 +0,0 @@
|
|
1 |
-
dossier,emotion
|
2 |
-
colere/c1ac.wav,colere
|
3 |
-
colere/c1af.wav,colere
|
4 |
-
colere/c1aj.wav,colere
|
5 |
-
colere/c1an.wav,colere
|
6 |
-
colere/c1bc.wav,colere
|
7 |
-
colere/c1bf.wav,colere
|
8 |
-
colere/c1bj.wav,colere
|
9 |
-
colere/c1bn.wav,colere
|
10 |
-
colere/c1cc.wav,colere
|
11 |
-
colere/c1cf.wav,colere
|
12 |
-
colere/c1cj.wav,colere
|
13 |
-
colere/c2ac.wav,colere
|
14 |
-
colere/c2af.wav,colere
|
15 |
-
colere/c2aj.wav,colere
|
16 |
-
colere/c2an.wav,colere
|
17 |
-
colere/c2bc.wav,colere
|
18 |
-
colere/c2bf.wav,colere
|
19 |
-
colere/c2bj.wav,colere
|
20 |
-
colere/c2bn.wav,colere
|
21 |
-
colere/c2cn.wav,colere
|
22 |
-
colere/c3ac.wav,colere
|
23 |
-
colere/c3af.wav,colere
|
24 |
-
colere/c3aj.wav,colere
|
25 |
-
colere/c3an.wav,colere
|
26 |
-
colere/c3bc.wav,colere
|
27 |
-
colere/c3bf.wav,colere
|
28 |
-
colere/c3bj.wav,colere
|
29 |
-
colere/c3bn.wav,colere
|
30 |
-
colere/c4aaf.wav,colere
|
31 |
-
colere/c4ac.wav,colere
|
32 |
-
colere/c4af.wav,colere
|
33 |
-
colere/c4aj.wav,colere
|
34 |
-
colere/c4an.wav,colere
|
35 |
-
colere/c4bc.wav,colere
|
36 |
-
colere/c4bj.wav,colere
|
37 |
-
colere/c4bn.wav,colere
|
38 |
-
colere/c5an.wav,colere
|
39 |
-
colere/c5c.wav,colere
|
40 |
-
colere/c5f.wav,colere
|
41 |
-
colere/c5j.wav,colere
|
42 |
-
neutre/n1ac.wav,neutre
|
43 |
-
neutre/n1af.wav,neutre
|
44 |
-
neutre/n1aj.wav,neutre
|
45 |
-
neutre/n1an.wav,neutre
|
46 |
-
neutre/n1bc.wav,neutre
|
47 |
-
neutre/n1bf.wav,neutre
|
48 |
-
neutre/n1bj.wav,neutre
|
49 |
-
neutre/n1bn.wav,neutre
|
50 |
-
neutre/n2ac.wav,neutre
|
51 |
-
neutre/n2af.wav,neutre
|
52 |
-
neutre/n2aj.wav,neutre
|
53 |
-
neutre/n2an.wav,neutre
|
54 |
-
neutre/n2bc.wav,neutre
|
55 |
-
neutre/n2bf.wav,neutre
|
56 |
-
neutre/n2bj.wav,neutre
|
57 |
-
neutre/n2bn.wav,neutre
|
58 |
-
neutre/n3ac.wav,neutre
|
59 |
-
neutre/n3af.wav,neutre
|
60 |
-
neutre/n3aj.wav,neutre
|
61 |
-
neutre/n3an.wav,neutre
|
62 |
-
neutre/n3bc.wav,neutre
|
63 |
-
neutre/n3bf.wav,neutre
|
64 |
-
neutre/n3bj.wav,neutre
|
65 |
-
neutre/n3bn.wav,neutre
|
66 |
-
neutre/n4ac.wav,neutre
|
67 |
-
neutre/n4aj.wav,neutre
|
68 |
-
neutre/n4an.wav,neutre
|
69 |
-
neutre/n4f.wav,neutre
|
70 |
-
neutre/n5ac.wav,neutre
|
71 |
-
neutre/n5af.wav,neutre
|
72 |
-
neutre/n5aj.wav,neutre
|
73 |
-
neutre/n5an.wav,neutre
|
74 |
-
neutre/n5bc.wav,neutre
|
75 |
-
neutre/n5bf.wav,neutre
|
76 |
-
neutre/n5bj.wav,neutre
|
77 |
-
neutre/n5bn.wav,neutre
|
78 |
-
joie/h1ac.wav,joie
|
79 |
-
joie/h1af.wav,joie
|
80 |
-
joie/h1aj.wav,joie
|
81 |
-
joie/h1an.wav,joie
|
82 |
-
joie/h1bc.wav,joie
|
83 |
-
joie/h1bf.wav,joie
|
84 |
-
joie/h1bj.wav,joie
|
85 |
-
joie/h1bn.wav,joie
|
86 |
-
joie/h21f.wav,joie
|
87 |
-
joie/h2ac.wav,joie
|
88 |
-
joie/h2aj.wav,joie
|
89 |
-
joie/h2an.wav,joie
|
90 |
-
joie/h2bc.wav,joie
|
91 |
-
joie/h2bf.wav,joie
|
92 |
-
joie/h2bj.wav,joie
|
93 |
-
joie/h2bn.wav,joie
|
94 |
-
joie/h3ac.wav,joie
|
95 |
-
joie/h3af.wav,joie
|
96 |
-
joie/h3aj.wav,joie
|
97 |
-
joie/h3anwav.wav,joie
|
98 |
-
joie/h3bc.wav,joie
|
99 |
-
joie/h3bf.wav,joie
|
100 |
-
joie/h3bj.wav,joie
|
101 |
-
joie/h3bn.wav,joie
|
102 |
-
joie/h4ac.wav,joie
|
103 |
-
joie/h4af.wav,joie
|
104 |
-
joie/h4aj.wav,joie
|
105 |
-
joie/h4an.wav,joie
|
106 |
-
joie/h4bc.wav,joie
|
107 |
-
joie/h4bf.wav,joie
|
108 |
-
joie/h4bj.wav,joie
|
109 |
-
joie/h4bn.wav,joie
|
110 |
-
joie/h5an.wav,joie
|
111 |
-
joie/h5c.wav,joie
|
112 |
-
joie/h5f.wav,joie
|
113 |
-
joie/h5j.wav,joie
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/data/processing.ipynb
DELETED
@@ -1,113 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": 2,
|
6 |
-
"metadata": {},
|
7 |
-
"outputs": [
|
8 |
-
{
|
9 |
-
"name": "stdout",
|
10 |
-
"output_type": "stream",
|
11 |
-
"text": [
|
12 |
-
"<class 'pandas.core.frame.DataFrame'>\n",
|
13 |
-
"RangeIndex: 112 entries, 0 to 111\n",
|
14 |
-
"Data columns (total 2 columns):\n",
|
15 |
-
" # Column Non-Null Count Dtype \n",
|
16 |
-
"--- ------ -------------- ----- \n",
|
17 |
-
" 0 dossier 112 non-null object\n",
|
18 |
-
" 1 emotion 112 non-null object\n",
|
19 |
-
"dtypes: object(2)\n",
|
20 |
-
"memory usage: 1.9+ KB\n"
|
21 |
-
]
|
22 |
-
}
|
23 |
-
],
|
24 |
-
"source": [
|
25 |
-
"import pandas as pd\n",
|
26 |
-
"\n",
|
27 |
-
"data = pd.read_csv('dataset.csv', sep=',', header=0)\n",
|
28 |
-
"\n",
|
29 |
-
"data.info()\n",
|
30 |
-
"\n"
|
31 |
-
]
|
32 |
-
},
|
33 |
-
{
|
34 |
-
"cell_type": "code",
|
35 |
-
"execution_count": 3,
|
36 |
-
"metadata": {},
|
37 |
-
"outputs": [
|
38 |
-
{
|
39 |
-
"name": "stdout",
|
40 |
-
"output_type": "stream",
|
41 |
-
"text": [
|
42 |
-
" dossier emotion\n",
|
43 |
-
"0 colere/c1ac.wav colere\n",
|
44 |
-
"1 colere/c1af.wav colere\n",
|
45 |
-
"2 colere/c1aj.wav colere\n",
|
46 |
-
"3 colere/c1an.wav colere\n",
|
47 |
-
"4 colere/c1bc.wav colere\n",
|
48 |
-
".. ... ...\n",
|
49 |
-
"107 joie/h4bn.wav joie\n",
|
50 |
-
"108 joie/h5an.wav joie\n",
|
51 |
-
"109 joie/h5c.wav joie\n",
|
52 |
-
"110 joie/h5f.wav joie\n",
|
53 |
-
"111 joie/h5j.wav joie\n",
|
54 |
-
"\n",
|
55 |
-
"[112 rows x 2 columns]\n"
|
56 |
-
]
|
57 |
-
},
|
58 |
-
{
|
59 |
-
"name": "stderr",
|
60 |
-
"output_type": "stream",
|
61 |
-
"text": [
|
62 |
-
"C:\\Users\\Evidya\\AppData\\Local\\Temp\\ipykernel_24704\\3726049179.py:8: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n",
|
63 |
-
" data = data.applymap(lambda x: x.strip() if isinstance(x, str) else x)\n"
|
64 |
-
]
|
65 |
-
}
|
66 |
-
],
|
67 |
-
"source": [
|
68 |
-
"import pandas as pd\n",
|
69 |
-
"\n",
|
70 |
-
"\n",
|
71 |
-
"# 🔹 1. Supprimer les espaces des noms de colonnes\n",
|
72 |
-
"data.columns = data.columns.str.strip()\n",
|
73 |
-
"\n",
|
74 |
-
"# 🔹 2. Supprimer les espaces dans toutes les cellules (colonnes object)\n",
|
75 |
-
"data = data.applymap(lambda x: x.strip() if isinstance(x, str) else x)\n",
|
76 |
-
"\n",
|
77 |
-
"# Afficher le DataFrame corrigé\n",
|
78 |
-
"print(data)\n"
|
79 |
-
]
|
80 |
-
},
|
81 |
-
{
|
82 |
-
"cell_type": "code",
|
83 |
-
"execution_count": 4,
|
84 |
-
"metadata": {},
|
85 |
-
"outputs": [],
|
86 |
-
"source": [
|
87 |
-
"# save to csv\n",
|
88 |
-
"data.to_csv('dataset.csv', index=False, sep=',')"
|
89 |
-
]
|
90 |
-
}
|
91 |
-
],
|
92 |
-
"metadata": {
|
93 |
-
"kernelspec": {
|
94 |
-
"display_name": ".venv",
|
95 |
-
"language": "python",
|
96 |
-
"name": "python3"
|
97 |
-
},
|
98 |
-
"language_info": {
|
99 |
-
"codemirror_mode": {
|
100 |
-
"name": "ipython",
|
101 |
-
"version": 3
|
102 |
-
},
|
103 |
-
"file_extension": ".py",
|
104 |
-
"mimetype": "text/x-python",
|
105 |
-
"name": "python",
|
106 |
-
"nbconvert_exporter": "python",
|
107 |
-
"pygments_lexer": "ipython3",
|
108 |
-
"version": "3.11.5"
|
109 |
-
}
|
110 |
-
},
|
111 |
-
"nbformat": 4,
|
112 |
-
"nbformat_minor": 2
|
113 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/model/__init__.py
DELETED
File without changes
|
src/predictions/feedback.csv
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
filepath,prediction,feedback
|
|
|
|
src/test_backend.ipynb
DELETED
@@ -1,63 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": 5,
|
6 |
-
"metadata": {},
|
7 |
-
"outputs": [
|
8 |
-
{
|
9 |
-
"name": "stdout",
|
10 |
-
"output_type": "stream",
|
11 |
-
"text": [
|
12 |
-
"Transcription : tu as encore oublié de faire le dossier c'était hurgent nom de chien\n"
|
13 |
-
]
|
14 |
-
}
|
15 |
-
],
|
16 |
-
"source": [
|
17 |
-
"# make a transcription from audio file\n",
|
18 |
-
"from model.transcriber import transcribe_audio\n",
|
19 |
-
"import os\n",
|
20 |
-
"\n",
|
21 |
-
"base_path = os.path.abspath(os.path.join(\"data\"))\n",
|
22 |
-
"audio_path = os.path.join(base_path, \"colere\", \"c1af.wav\") # path to audio file\n",
|
23 |
-
"texte = transcribe_audio(audio_path)\n",
|
24 |
-
"print(f\"Transcription : {texte}\")"
|
25 |
-
]
|
26 |
-
},
|
27 |
-
{
|
28 |
-
"cell_type": "code",
|
29 |
-
"execution_count": null,
|
30 |
-
"metadata": {},
|
31 |
-
"outputs": [],
|
32 |
-
"source": [
|
33 |
-
"from predict import predict_emotion\n",
|
34 |
-
"\n",
|
35 |
-
"base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), \"data\"))\n",
|
36 |
-
"audio_file = os.path.join(base_path, \"colere\", \"c1ac.wav\")\n",
|
37 |
-
"emotion = predict_emotion(audio_file)\n",
|
38 |
-
"print(f\"🎤 L'émotion prédite est : {emotion}\")"
|
39 |
-
]
|
40 |
-
}
|
41 |
-
],
|
42 |
-
"metadata": {
|
43 |
-
"kernelspec": {
|
44 |
-
"display_name": ".venv",
|
45 |
-
"language": "python",
|
46 |
-
"name": "python3"
|
47 |
-
},
|
48 |
-
"language_info": {
|
49 |
-
"codemirror_mode": {
|
50 |
-
"name": "ipython",
|
51 |
-
"version": 3
|
52 |
-
},
|
53 |
-
"file_extension": ".py",
|
54 |
-
"mimetype": "text/x-python",
|
55 |
-
"name": "python",
|
56 |
-
"nbconvert_exporter": "python",
|
57 |
-
"pygments_lexer": "ipython3",
|
58 |
-
"version": "3.11.5"
|
59 |
-
}
|
60 |
-
},
|
61 |
-
"nbformat": 4,
|
62 |
-
"nbformat_minor": 2
|
63 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_speech.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torchaudio
|
3 |
+
from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
|
4 |
+
import os
|
5 |
+
|
6 |
+
# 🔹 Paramètres
|
7 |
+
MODEL_NAME = "./wav2vec2_emotion" # Chemin du modèle sauvegardé
|
8 |
+
LABELS = ["colere", "joie", "neutre"] # Les classes
|
9 |
+
|
10 |
+
# 🔹 Charger le processeur et le modèle
|
11 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
12 |
+
processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
|
13 |
+
model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_NAME).to(device)
|
14 |
+
model.eval() # Mode évaluation
|
15 |
+
|
16 |
+
|
17 |
+
def predict_emotion(audio_path):
|
18 |
+
# Charger l'audio
|
19 |
+
waveform, sample_rate = torchaudio.load(audio_path)
|
20 |
+
|
21 |
+
# Prétraitement du son
|
22 |
+
inputs = processor(
|
23 |
+
waveform.squeeze().numpy(),
|
24 |
+
sampling_rate=sample_rate,
|
25 |
+
return_tensors="pt",
|
26 |
+
padding=True,
|
27 |
+
truncation=True,
|
28 |
+
max_length=32000 # Ajuste selon la durée de tes fichiers
|
29 |
+
)
|
30 |
+
|
31 |
+
# Envoyer les données sur le bon device (CPU ou GPU)
|
32 |
+
input_values = inputs["input_values"].to(device)
|
33 |
+
|
34 |
+
# Prédiction
|
35 |
+
with torch.no_grad():
|
36 |
+
logits = model(input_values).logits
|
37 |
+
|
38 |
+
# Trouver l'émotion prédite
|
39 |
+
predicted_class = torch.argmax(logits, dim=-1).item()
|
40 |
+
|
41 |
+
return LABELS[predicted_class] # Retourne le label correspondant
|
42 |
+
|
43 |
+
base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "data"))
|
44 |
+
audio_file = os.path.join(base_path, "colere", "c1ac.wav")
|
45 |
+
predicted_emotion = predict_emotion(audio_file)
|
46 |
+
print(f"🎙️ Émotion prédite : {predicted_emotion}")
|
47 |
+
|
48 |
+
|
49 |
+
|
src/train.py → train.py
RENAMED
File without changes
|
train_speech.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torchaudio
|
3 |
+
import os
|
4 |
+
from datasets import Dataset, DatasetDict
|
5 |
+
from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification, TrainingArguments, Trainer
|
6 |
+
|
7 |
+
# 🔹 Paramètres
|
8 |
+
MODEL_NAME = "facebook/wav2vec2-large-xlsr-53-french"
|
9 |
+
NUM_LABELS = 3 # Nombre de classes émotionnelles
|
10 |
+
BATCH_SIZE = 8
|
11 |
+
EPOCHS = 10
|
12 |
+
LEARNING_RATE = 1e-4
|
13 |
+
MAX_LENGTH = 32000 # Ajuste selon la durée de tes fichiers audio
|
14 |
+
|
15 |
+
# 🔹 Vérifier GPU dispo
|
16 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
17 |
+
|
18 |
+
# 🔹 Charger le processeur et le modèle
|
19 |
+
processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
|
20 |
+
model = Wav2Vec2ForSequenceClassification.from_pretrained(
|
21 |
+
MODEL_NAME,
|
22 |
+
num_labels=NUM_LABELS,
|
23 |
+
problem_type="single_label_classification"
|
24 |
+
).to(device)
|
25 |
+
|
26 |
+
# 🔹 Fonction pour charger les fichiers audio sans CSV
|
27 |
+
def load_audio_data(data_dir):
|
28 |
+
data = {"file_path": [], "label": []}
|
29 |
+
labels = ["colere", "joie", "neutre"] # Ajuste selon tes classes
|
30 |
+
|
31 |
+
for label in labels:
|
32 |
+
folder_path = os.path.join(data_dir, label)
|
33 |
+
for file in os.listdir(folder_path):
|
34 |
+
if file.endswith(".wav"):
|
35 |
+
data["file_path"].append(os.path.join(folder_path, file))
|
36 |
+
data["label"].append(labels.index(label))
|
37 |
+
|
38 |
+
dataset = Dataset.from_dict(data)
|
39 |
+
train_test_split = dataset.train_test_split(test_size=0.2) # 80% train, 20% test
|
40 |
+
return DatasetDict({"train": train_test_split["train"], "test": train_test_split["test"]})
|
41 |
+
|
42 |
+
# 🔹 Prétraitement de l'audio
|
43 |
+
def preprocess_audio(file_path):
|
44 |
+
waveform, sample_rate = torchaudio.load(file_path)
|
45 |
+
inputs = processor(
|
46 |
+
waveform.squeeze().numpy(),
|
47 |
+
sampling_rate=sample_rate,
|
48 |
+
return_tensors="pt",
|
49 |
+
padding=True,
|
50 |
+
truncation=True,
|
51 |
+
max_length=MAX_LENGTH # ✅ Correction de l'erreur
|
52 |
+
)
|
53 |
+
return inputs["input_values"][0] # Récupère les valeurs audio prétraitées
|
54 |
+
|
55 |
+
# 🔹 Charger et prétraiter le dataset
|
56 |
+
data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "data"))
|
57 |
+
ds = load_audio_data(data_dir)
|
58 |
+
|
59 |
+
def preprocess_batch(batch):
|
60 |
+
batch["input_values"] = preprocess_audio(batch["file_path"])
|
61 |
+
return batch
|
62 |
+
|
63 |
+
ds = ds.map(preprocess_batch, remove_columns=["file_path"])
|
64 |
+
|
65 |
+
# 🔹 Définir les arguments d'entraînement
|
66 |
+
training_args = TrainingArguments(
|
67 |
+
output_dir="./wav2vec2_emotion",
|
68 |
+
evaluation_strategy="epoch",
|
69 |
+
save_strategy="epoch",
|
70 |
+
learning_rate=LEARNING_RATE,
|
71 |
+
per_device_train_batch_size=BATCH_SIZE,
|
72 |
+
per_device_eval_batch_size=BATCH_SIZE,
|
73 |
+
num_train_epochs=EPOCHS,
|
74 |
+
save_total_limit=2,
|
75 |
+
logging_dir="./logs",
|
76 |
+
logging_steps=10,
|
77 |
+
)
|
78 |
+
|
79 |
+
# 🔹 Définir le trainer
|
80 |
+
trainer = Trainer(
|
81 |
+
model=model,
|
82 |
+
args=training_args,
|
83 |
+
train_dataset=ds["train"],
|
84 |
+
eval_dataset=ds["test"],
|
85 |
+
)
|
86 |
+
|
87 |
+
# 🚀 Lancer l'entraînement
|
88 |
+
trainer.train()
|
utils.py
DELETED
@@ -1,4 +0,0 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import datetime
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
|
{src/utils → utils}/__init__.py
RENAMED
File without changes
|
{src/utils → utils}/dataset.py
RENAMED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
from datasets import Dataset
|
3 |
-
from
|
4 |
import pandas as pd
|
5 |
|
6 |
def load_audio_data(data_dir):
|
|
|
1 |
import os
|
2 |
from datasets import Dataset
|
3 |
+
from config import LABELS
|
4 |
import pandas as pd
|
5 |
|
6 |
def load_audio_data(data_dir):
|
{src/utils → utils}/preprocessing.py
RENAMED
@@ -3,8 +3,8 @@ import soundfile as sf
|
|
3 |
import torch
|
4 |
import torchaudio
|
5 |
import numpy as np
|
6 |
-
from
|
7 |
-
from
|
8 |
|
9 |
# Resampler pour convertir en 16kHz
|
10 |
resampler = torchaudio.transforms.Resample(orig_freq=48_000, new_freq=16_000)
|
|
|
3 |
import torch
|
4 |
import torchaudio
|
5 |
import numpy as np
|
6 |
+
from model.feature_extractor import processor # type: ignore
|
7 |
+
from config import DEVICE
|
8 |
|
9 |
# Resampler pour convertir en 16kHz
|
10 |
resampler = torchaudio.transforms.Resample(orig_freq=48_000, new_freq=16_000)
|
views/about.py
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
|
3 |
-
|
4 |
-
def about():
|
5 |
-
st.title("About")
|
6 |
-
|
7 |
-
col1, col2 = st.columns(2)
|
8 |
-
|
9 |
-
with col1:
|
10 |
-
st.markdown("### About")
|
11 |
-
st.write("This dashboard is maintained by the M2 SISE team.")
|
12 |
-
st.write("For more information, please visit the [GitHub repository](https://github.com/jdalfons/sise-ultimate-challenge/tree/main).")
|
13 |
-
|
14 |
-
with col2:
|
15 |
-
st.markdown("### Collaborators")
|
16 |
-
st.write("""
|
17 |
-
- [Falonne Kpamegan](https://github.com/marinaKpamegan)
|
18 |
-
- [Nancy Randriamiarijaona](https://github.com/yminanc)
|
19 |
-
- [Cyril Kocab](https://github.com/Cyr-CK)
|
20 |
-
- [Juan Alfonso](https://github.com/jdalfons)
|
21 |
-
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
views/emotion_analysis.py
DELETED
@@ -1,150 +0,0 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import pandas as pd
|
3 |
-
import os
|
4 |
-
import matplotlib.pyplot as plt
|
5 |
-
import librosa
|
6 |
-
from src.predict import predict_emotion
|
7 |
-
|
8 |
-
DIRECTORY = "audios"
|
9 |
-
FILE_NAME = "audio.wav"
|
10 |
-
RATE = 16000
|
11 |
-
|
12 |
-
def emotion_analysis():
|
13 |
-
|
14 |
-
st.header("❤️ Emotion Analysis")
|
15 |
-
|
16 |
-
if st.session_state.audio_file is None:
|
17 |
-
st.info("Please, upload or record an audio file in the studio tab")
|
18 |
-
st.stop()
|
19 |
-
else:
|
20 |
-
audio_file = st.session_state.audio_file
|
21 |
-
|
22 |
-
start_inference = st.button("Start emotion recogniton","inf_on_upl_btn")
|
23 |
-
emotion_labels = ["colere", "neutre", "joie"]
|
24 |
-
colors = ['#f71c1c', '#cac8c8', '#f6d60a']
|
25 |
-
|
26 |
-
if start_inference:
|
27 |
-
# Configuration Streamlit
|
28 |
-
with st.spinner("Real-time emotion analysis..."):
|
29 |
-
# uploaded_file = st.file_uploader("Choisissez un fichier audio", type=["wav", "mp3"])
|
30 |
-
|
31 |
-
if audio_file is not None:
|
32 |
-
# Charger et rééchantillonner l'audio
|
33 |
-
audio, sr = librosa.load(audio_file, sr=RATE)
|
34 |
-
# chunk = audio_file
|
35 |
-
|
36 |
-
# Paramètres de la fenêtre glissante
|
37 |
-
window_size = 1 # 1 seconde de données
|
38 |
-
hop_length = 0.5 # 0.5 secondes de chevauchement
|
39 |
-
|
40 |
-
# Créer un graphique en temps réel
|
41 |
-
fig, ax = plt.subplots()
|
42 |
-
lines = [ax.plot([], [], label=emotion)[0] for emotion in emotion_labels]
|
43 |
-
ax.set_ylim(0, 1)
|
44 |
-
ax.set_xlim(0, len(audio) / sr)
|
45 |
-
ax.set_xlabel("Temps (s)")
|
46 |
-
ax.set_ylabel("Probabilité")
|
47 |
-
|
48 |
-
chart = st.pyplot(fig)
|
49 |
-
|
50 |
-
scores = [[],[],[]] # 3 émotions pour l'instant
|
51 |
-
|
52 |
-
# Traitement par fenêtre glissante
|
53 |
-
for i in range(0, len(audio), int(hop_length * sr)):
|
54 |
-
chunk = audio[i:i + int(window_size * sr)]
|
55 |
-
if len(chunk) < int(window_size * sr):
|
56 |
-
break
|
57 |
-
|
58 |
-
emotion_scores = predict_emotion(chunk, output_probs=True, sampling_rate=RATE)
|
59 |
-
|
60 |
-
# Mettre à jour le graphique
|
61 |
-
for emotion, line in zip(emotion_labels, lines):
|
62 |
-
xdata = list(line.get_xdata())
|
63 |
-
ydata = list(line.get_ydata())
|
64 |
-
colour = colors[list(emotion_scores).index(emotion)]
|
65 |
-
xdata.append(i / sr)
|
66 |
-
ydata.append(emotion_scores[emotion])
|
67 |
-
scores[list(emotion_scores).index(emotion)].append(emotion_scores[emotion])
|
68 |
-
line.set_data(xdata, ydata)
|
69 |
-
line.set_color(colour)
|
70 |
-
|
71 |
-
ax.relim()
|
72 |
-
ax.autoscale_view()
|
73 |
-
ax.legend()
|
74 |
-
chart.pyplot(fig, use_container_width=True)
|
75 |
-
|
76 |
-
# Prepare the styling
|
77 |
-
st.markdown("""
|
78 |
-
<style>
|
79 |
-
.colored-box {
|
80 |
-
padding: 10px;
|
81 |
-
border-radius: 5px;
|
82 |
-
color: white;
|
83 |
-
font-weight: bold;
|
84 |
-
text-align: center;
|
85 |
-
}
|
86 |
-
</style>
|
87 |
-
"""
|
88 |
-
, unsafe_allow_html=True)
|
89 |
-
|
90 |
-
# Dynamically create the specified number of columns
|
91 |
-
columns = st.columns(len(emotion_scores))
|
92 |
-
|
93 |
-
# emotion_scores_mean = [sum(sublist) / len(sublist) for sublist in scores]
|
94 |
-
emotion_scores_mean = {emotion:sum(sublist) / len(sublist) for emotion, sublist in zip(emotion_labels, scores)}
|
95 |
-
max_emo = max(emotion_scores_mean)
|
96 |
-
emotion_scores_sorted = dict(sorted(emotion_scores_mean.items(), key=lambda x: x[1], reverse=True))
|
97 |
-
colors_sorted = [colors[list(emotion_scores_mean.keys()).index(key)] for key in list(emotion_scores_sorted.keys())]
|
98 |
-
|
99 |
-
# Add content to each column
|
100 |
-
for i, (col, emotion) in enumerate(zip(columns, emotion_scores_sorted)):
|
101 |
-
color = colors_sorted[i % len(colors_sorted)] # Cycle through colors if more columns than colors
|
102 |
-
col.markdown(f"""
|
103 |
-
<div class="colored-box" style="background-color: {color};">
|
104 |
-
{emotion} : {100*emotion_scores_sorted[emotion]:.2f} %
|
105 |
-
</div>
|
106 |
-
"""
|
107 |
-
, unsafe_allow_html=True)
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
st.success("Analyse terminée !")
|
112 |
-
else:
|
113 |
-
st.warning("You need to load an audio file !")
|
114 |
-
|
115 |
-
if start_inference:
|
116 |
-
|
117 |
-
st.subheader("Feedback")
|
118 |
-
|
119 |
-
# Initialisation du fichier CSV
|
120 |
-
csv_file = os.path.join("src","predictions","feedback.csv")
|
121 |
-
|
122 |
-
# Vérifier si le fichier CSV existe, sinon le créer avec des colonnes appropriées
|
123 |
-
if not os.path.exists(csv_file):
|
124 |
-
df = pd.DataFrame(columns=["filepath", "prediction", "feedback"])
|
125 |
-
df.to_csv(csv_file, index=False)
|
126 |
-
|
127 |
-
# Charger les données existantes du CSV
|
128 |
-
df = pd.read_csv(csv_file)
|
129 |
-
|
130 |
-
with st.form("feedback_form"):
|
131 |
-
st.write("What should have been the correct prediction ? (*Choose the same emotion if the prediction was correct*).")
|
132 |
-
feedback = st.selectbox("Your answer :", ['Sadness','Anger', 'Disgust', 'Fear', 'Surprise', 'Joy', 'Neutral'])
|
133 |
-
submit_button = st.form_submit_button("Submit")
|
134 |
-
st.write("En cliquant sur ce bouton, vous acceptez que votre audio soit sauvegardé dans notre base de données.")
|
135 |
-
|
136 |
-
if submit_button:
|
137 |
-
# Ajouter le feedback au DataFrame
|
138 |
-
new_entry = pd.DataFrame([{"filepath": audio_file.name, "prediction": max_emo, "feedback": feedback}])
|
139 |
-
# df = df.append(new_entry, ignore_index=True)
|
140 |
-
df = pd.concat([df, new_entry], ignore_index=True)
|
141 |
-
|
142 |
-
# Sauvegarder les données mises à jour dans le fichier CSV
|
143 |
-
df.to_csv(csv_file, index=False)
|
144 |
-
|
145 |
-
# Sauvegarder le fichier audio
|
146 |
-
with open(os.path.join("src","predictions","data",audio_file.name), "wb") as f:
|
147 |
-
f.write(audio_file.getbuffer())
|
148 |
-
|
149 |
-
# Confirmation pour l'utilisateur
|
150 |
-
st.success("Merci pour votre retour ! Vos données ont été sauvegardées.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
views/real_time.py
DELETED
@@ -1,327 +0,0 @@
|
|
1 |
-
################################
|
2 |
-
### Real time prediction for real time record
|
3 |
-
###############################
|
4 |
-
|
5 |
-
import streamlit as st
|
6 |
-
import pyaudio
|
7 |
-
import wave
|
8 |
-
import torch
|
9 |
-
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
|
10 |
-
import matplotlib.pyplot as plt
|
11 |
-
import numpy as np
|
12 |
-
import time
|
13 |
-
|
14 |
-
# Paramètres audio
|
15 |
-
CHUNK = 1024
|
16 |
-
FORMAT = pyaudio.paInt16
|
17 |
-
CHANNELS = 1
|
18 |
-
RATE = 16000
|
19 |
-
|
20 |
-
# Interface Streamlit
|
21 |
-
st.title("Détection des émotions en temps réel")
|
22 |
-
|
23 |
-
# Boutons pour démarrer et arrêter l'enregistrement
|
24 |
-
start_button = st.button("Démarrer l'enregistrement")
|
25 |
-
stop_button = st.button("Arrêter l'enregistrement")
|
26 |
-
|
27 |
-
# Zone de visualisation des émotions en temps réel
|
28 |
-
emotion_placeholder = st.empty()
|
29 |
-
final_emotion_placeholder = st.empty()
|
30 |
-
|
31 |
-
if start_button:
|
32 |
-
st.write("Enregistrement en cours...")
|
33 |
-
audio = pyaudio.PyAudio()
|
34 |
-
stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
|
35 |
-
|
36 |
-
frames = []
|
37 |
-
real_time_emotions = []
|
38 |
-
|
39 |
-
while not stop_button:
|
40 |
-
data = stream.read(CHUNK)
|
41 |
-
frames.append(data)
|
42 |
-
|
43 |
-
# Traitement en temps réel (par tranche de 1 seconde)
|
44 |
-
if len(frames) >= RATE // CHUNK:
|
45 |
-
audio_segment = np.frombuffer(b''.join(frames[-(RATE // CHUNK):]), dtype=np.int16)
|
46 |
-
emotion = predict_emotion(audio_segment, output_probs=False, sampling_rate=RATE)
|
47 |
-
real_time_emotions.append(emotion)
|
48 |
-
emotion_placeholder.line_chart(real_time_emotions) # Affichage graphique des émotions
|
49 |
-
|
50 |
-
# Arrêt de l'enregistrement
|
51 |
-
stream.stop_stream()
|
52 |
-
stream.close()
|
53 |
-
audio.terminate()
|
54 |
-
|
55 |
-
# Sauvegarde de l'audio enregistré
|
56 |
-
wf = wave.open("output.wav", "wb")
|
57 |
-
wf.setnchannels(CHANNELS)
|
58 |
-
wf.setsampwidth(audio.get_sample_size(FORMAT))
|
59 |
-
wf.setframerate(RATE)
|
60 |
-
wf.writeframes(b"".join(frames))
|
61 |
-
wf.close()
|
62 |
-
|
63 |
-
# Prédiction finale sur tout l'audio enregistré
|
64 |
-
full_audio_data = np.frombuffer(b''.join(frames), dtype=np.int16)
|
65 |
-
final_emotion = predict_emotion(full_audio_data)
|
66 |
-
|
67 |
-
final_emotion_placeholder.write(f"Émotion finale prédite : {final_emotion}")
|
68 |
-
|
69 |
-
|
70 |
-
################################
|
71 |
-
### Real time prediction for uploaded audio file
|
72 |
-
###############################
|
73 |
-
# Charger le modèle wav2vec et le processeur
|
74 |
-
|
75 |
-
# # Configuration Streamlit
|
76 |
-
# st.title("Analyse des émotions en temps réel")
|
77 |
-
# uploaded_file = st.file_uploader("Choisissez un fichier audio", type=["wav", "mp3"])
|
78 |
-
|
79 |
-
# if uploaded_file is not None:
|
80 |
-
# # Charger et rééchantillonner l'audio
|
81 |
-
# audio, sr = librosa.load(uploaded_file, sr=16000)
|
82 |
-
|
83 |
-
# # Paramètres de la fenêtre glissante
|
84 |
-
# window_size = 1 # en secondes
|
85 |
-
# hop_length = 0.5 # en secondes
|
86 |
-
|
87 |
-
# # Créer un graphique en temps réel
|
88 |
-
# fig, ax = plt.subplots()
|
89 |
-
# lines = [ax.plot([], [], label=emotion)[0] for emotion in emotions]
|
90 |
-
# ax.set_ylim(0, 1)
|
91 |
-
# ax.set_xlim(0, len(audio) / sr)
|
92 |
-
# ax.set_xlabel("Temps (s)")
|
93 |
-
# ax.set_ylabel("Probabilité")
|
94 |
-
# ax.legend()
|
95 |
-
|
96 |
-
# chart = st.pyplot(fig)
|
97 |
-
|
98 |
-
# # Traitement par fenêtre glissante
|
99 |
-
# for i in range(0, len(audio), int(hop_length * sr)):
|
100 |
-
# chunk = audio[i:i + int(window_size * sr)]
|
101 |
-
# if len(chunk) < int(window_size * sr):
|
102 |
-
# break
|
103 |
-
|
104 |
-
# emotion_scores = predict_emotion(chunk, output_probs=False, sampling_rate=RATE)
|
105 |
-
|
106 |
-
# # Mettre à jour le graphique
|
107 |
-
# for emotion, line in zip(emotions, lines):
|
108 |
-
# xdata = line.get_xdata().tolist()
|
109 |
-
# ydata = line.get_ydata().tolist()
|
110 |
-
# xdata.append(i / sr)
|
111 |
-
# ydata.append(emotion_scores[emotion])
|
112 |
-
# line.set_data(xdata, ydata)
|
113 |
-
|
114 |
-
# ax.relim()
|
115 |
-
# ax.autoscale_view()
|
116 |
-
# chart.pyplot(fig)
|
117 |
-
|
118 |
-
# st.success("Analyse terminée !")
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
############################################
|
127 |
-
### Progress bar
|
128 |
-
############################################
|
129 |
-
|
130 |
-
with st.status("Downloading data...", expanded=True) as status:
|
131 |
-
st.write("Searching for data...")
|
132 |
-
time.sleep(2)
|
133 |
-
st.write("Found URL.")
|
134 |
-
time.sleep(1)
|
135 |
-
st.write("Downloading data...")
|
136 |
-
time.sleep(1)
|
137 |
-
status.update(
|
138 |
-
label="Download complete!", state="complete", expanded=False
|
139 |
-
)
|
140 |
-
|
141 |
-
st.button("Rerun")
|
142 |
-
|
143 |
-
|
144 |
-
############################################
|
145 |
-
### Time duration estimation
|
146 |
-
############################################
|
147 |
-
progress_bar = st.progress(0)
|
148 |
-
time_placeholder = st.empty()
|
149 |
-
|
150 |
-
total_time = 10 # Total estimated time in seconds
|
151 |
-
for i in range(total_time):
|
152 |
-
# Update progress bar
|
153 |
-
progress_bar.progress((i + 1) / total_time)
|
154 |
-
|
155 |
-
# Update time estimation
|
156 |
-
remaining_time = total_time - i - 1
|
157 |
-
time_placeholder.text(f"Estimated time remaining: {remaining_time} seconds")
|
158 |
-
|
159 |
-
# Simulate task progress
|
160 |
-
time.sleep(1)
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
############################################
|
165 |
-
### Audio file noise reduction
|
166 |
-
############################################
|
167 |
-
from pydub import AudioSegment
|
168 |
-
import noisereduce as nr
|
169 |
-
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
170 |
-
|
171 |
-
# Fonction de réduction de bruit
|
172 |
-
def reduce_noise(audio_data, sr):
|
173 |
-
reduced_noise = nr.reduce_noise(y=audio_data, sr=sr)
|
174 |
-
return reduced_noise
|
175 |
-
|
176 |
-
# Chargement du modèle wav2vec
|
177 |
-
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
|
178 |
-
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
|
179 |
-
|
180 |
-
# Interface Streamlit
|
181 |
-
st.title("Application de transcription audio avec réduction de bruit")
|
182 |
-
|
183 |
-
uploaded_file = st.file_uploader("Choisissez un fichier audio .wav", type="wav")
|
184 |
-
|
185 |
-
if uploaded_file is not None:
|
186 |
-
# Chargement et prétraitement de l'audio
|
187 |
-
audio = AudioSegment.from_wav(uploaded_file)
|
188 |
-
audio_array = np.array(audio.get_array_of_samples())
|
189 |
-
|
190 |
-
# Réduction de bruit
|
191 |
-
reduced_noise_audio = reduce_noise(audio_array, audio.frame_rate)
|
192 |
-
|
193 |
-
# Traitement avec wav2vec
|
194 |
-
input_values = processor(reduced_noise_audio, sampling_rate=audio.frame_rate, return_tensors="pt").input_values
|
195 |
-
|
196 |
-
with torch.no_grad():
|
197 |
-
logits = model(input_values).logits
|
198 |
-
|
199 |
-
predicted_ids = torch.argmax(logits, dim=-1)
|
200 |
-
transcription = processor.batch_decode(predicted_ids)[0]
|
201 |
-
|
202 |
-
st.audio(uploaded_file, format="audio/wav")
|
203 |
-
st.write("Transcription:")
|
204 |
-
st.write(transcription)
|
205 |
-
|
206 |
-
|
207 |
-
############################################
|
208 |
-
### Choix des émotions
|
209 |
-
############################################
|
210 |
-
# options = ['Sadness','Anger', 'Disgust', 'Fear', 'Surprise', 'Joy','Neutral']
|
211 |
-
# selected_options = st.multiselect('What emotions do you want to be displayed', options, default=['Joy', 'Anger','Neutral])
|
212 |
-
|
213 |
-
|
214 |
-
############################################
|
215 |
-
### Transcription Speech2Text
|
216 |
-
############################################
|
217 |
-
# # Fonction pour transcrire l'audio
|
218 |
-
# def transcribe_audio(audio):
|
219 |
-
# # Préparer les données d'entrée pour le modèle
|
220 |
-
# input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
|
221 |
-
|
222 |
-
# # Passer les données dans le modèle pour obtenir les logits
|
223 |
-
# with torch.no_grad():
|
224 |
-
# logits = model(input_values).logits
|
225 |
-
|
226 |
-
# # Décoder les prédictions en texte
|
227 |
-
# predicted_ids = torch.argmax(logits, dim=-1)
|
228 |
-
# transcription = processor.batch_decode(predicted_ids)[0]
|
229 |
-
# return transcription
|
230 |
-
|
231 |
-
# # Charger et transcrire l'audio
|
232 |
-
# # audio, rate = load_audio(audio_file_path) # (re)chargement de l'audio si nécessaire
|
233 |
-
# transcription = transcribe_audio(audio)
|
234 |
-
|
235 |
-
# # Afficher la transcription
|
236 |
-
# print("Transcription :", transcription)
|
237 |
-
|
238 |
-
|
239 |
-
############################################
|
240 |
-
### Feedback
|
241 |
-
############################################
|
242 |
-
import pandas as pd
|
243 |
-
import os
|
244 |
-
|
245 |
-
# Initialisation du fichier CSV
|
246 |
-
csv_file = "predictions/feedback.csv"
|
247 |
-
|
248 |
-
# Vérifier si le fichier CSV existe, sinon le créer avec des colonnes appropriées
|
249 |
-
if not os.path.exists(csv_file):
|
250 |
-
df = pd.DataFrame(columns=["filepath", "prediction", "feedback"])
|
251 |
-
df.to_csv(csv_file, index=False)
|
252 |
-
|
253 |
-
# Charger les données existantes du CSV
|
254 |
-
df = pd.read_csv(csv_file)
|
255 |
-
|
256 |
-
# Interface Streamlit
|
257 |
-
st.title("Predicted emotion feedback")
|
258 |
-
|
259 |
-
# Simuler une prédiction pour l'exemple (remplacez par votre modèle réel)
|
260 |
-
audio_file_name = "example_audio.wav"
|
261 |
-
predicted_emotion = "Joie" # Exemple de prédiction
|
262 |
-
|
263 |
-
st.write(f"Fichier audio : {audio_file_name}")
|
264 |
-
st.write(f"Émotion détectée : {predicted_emotion}")
|
265 |
-
|
266 |
-
# Formulaire de feedback
|
267 |
-
with st.form("feedback_form"):
|
268 |
-
st.write("Est-ce la bonne émotion qui a été détectée ? Cochez la réelle émotion.")
|
269 |
-
feedback = st.selectbox("Votre réponse :", ['Sadness','Anger', 'Disgust', 'Fear', 'Surprise', 'Joy', 'Neutral'])
|
270 |
-
submit_button = st.form_submit_button("Soumettre")
|
271 |
-
st.write("En cliquant sur ce bouton, vous acceptez que votre audio soit sauvegardé dans notre base de données.")
|
272 |
-
|
273 |
-
if submit_button:
|
274 |
-
# Ajouter le feedback au DataFrame
|
275 |
-
new_entry = {"filepath": audio_file_name, "prediction": predicted_emotion, "feedback": feedback}
|
276 |
-
df = df.append(new_entry, ignore_index=True)
|
277 |
-
|
278 |
-
# Sauvegarder les données mises à jour dans le fichier CSV
|
279 |
-
df.to_csv(csv_file, index=False)
|
280 |
-
|
281 |
-
# Sauvegarder le fichier audio
|
282 |
-
with open("predictions/data", "wb") as f:
|
283 |
-
f.write(uploaded_file.getbuffer())
|
284 |
-
|
285 |
-
# Confirmation pour l'utilisateur
|
286 |
-
st.success("Merci pour votre retour ! Vos données ont été sauvegardées.")
|
287 |
-
|
288 |
-
# Afficher les données sauvegardées (optionnel)
|
289 |
-
# st.write("Données collectées jusqu'à présent :")
|
290 |
-
# st.dataframe(df)
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
############################################
|
307 |
-
### Predict proba (to replace in predict.py)
|
308 |
-
############################################
|
309 |
-
import librosa
|
310 |
-
def predict_emotion_probabilities(audio_path):
|
311 |
-
waveform, _ = librosa.load(audio_path, sr=16000)
|
312 |
-
input_values = processor(waveform, return_tensors="pt", sampling_rate=16000).input_values
|
313 |
-
input_values = input_values.to(device)
|
314 |
-
|
315 |
-
with torch.no_grad():
|
316 |
-
outputs = model(input_values)
|
317 |
-
|
318 |
-
# Appliquer softmax pour obtenir des probabilités
|
319 |
-
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
320 |
-
|
321 |
-
# Convertir en numpy array et prendre le premier (et seul) élément
|
322 |
-
probabilities = probabilities[0].detach().cpu().numpy()
|
323 |
-
|
324 |
-
# Créer un dictionnaire associant chaque émotion à sa probabilité
|
325 |
-
emotion_probabilities = {emotion: prob for emotion, prob in zip(emotion_labels, probabilities)}
|
326 |
-
|
327 |
-
return emotion_probabilities
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
views/studio.py
DELETED
@@ -1,176 +0,0 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
from st_audiorec import st_audiorec
|
3 |
-
|
4 |
-
from src.model.transcriber import transcribe_audio
|
5 |
-
|
6 |
-
|
7 |
-
def studio():
|
8 |
-
st.title("SISE ultimate challenge")
|
9 |
-
st.write("C'est le dernier challenge de la formation SISE.")
|
10 |
-
st.markdown("""
|
11 |
-
**Overview:**
|
12 |
-
- Analyse de logs
|
13 |
-
- Analyse de données
|
14 |
-
- Machine learning
|
15 |
-
""")
|
16 |
-
|
17 |
-
st.markdown("---")
|
18 |
-
|
19 |
-
st.header("🎧 Audio File Studio")
|
20 |
-
|
21 |
-
tab1, tab2, tab3 = st.tabs(["⬆️ Record Audio", "🔈 Realtime Audio", "📝 Transcription"])
|
22 |
-
|
23 |
-
with tab1:
|
24 |
-
st.header("⬆️ Upload Audio Record")
|
25 |
-
st.write("Here you can upload a pre-recorded audio.")
|
26 |
-
audio_file = st.file_uploader("Upload an audio file", type=["wav"])
|
27 |
-
|
28 |
-
if "audio_file" not in st.session_state:
|
29 |
-
st.session_state.audio_file = None
|
30 |
-
|
31 |
-
if audio_file is not None:
|
32 |
-
st.success("Audio file uploaded successfully !")
|
33 |
-
st.session_state.audio_file = audio_file
|
34 |
-
|
35 |
-
# with open(os.path.join(DIRECTORY,FILE_NAME), "wb") as f:
|
36 |
-
# f.write(audio_file.getbuffer())
|
37 |
-
# st.success(f"Saved file: {FILE_NAME}")
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
with tab2:
|
42 |
-
st.header("🔈 Realtime Audio Record")
|
43 |
-
st.write("Here you can record an audio.")
|
44 |
-
|
45 |
-
if "audio_file" not in st.session_state:
|
46 |
-
st.session_state.audio_file = None
|
47 |
-
|
48 |
-
audio_file = st_audiorec()
|
49 |
-
|
50 |
-
if audio_file is not None:
|
51 |
-
st.audio(audio_file, format='audio/wav')
|
52 |
-
st.success("Audio recorded successfully !")
|
53 |
-
st.session_state.audio_file = audio_file
|
54 |
-
|
55 |
-
##############################################"realtime audio record"##############################################
|
56 |
-
# Boutons pour démarrer et arrêter l'enregistrement
|
57 |
-
# start_button = st.button("Démarrer l'enregistrement")
|
58 |
-
# stop_button = st.button("Arrêter l'enregistrement")
|
59 |
-
# start_stop = st.button("Démarrer/Arrêter l'enregistrement")
|
60 |
-
|
61 |
-
|
62 |
-
# Zone de visualisation des émotions en temps réel
|
63 |
-
# emotion_placeholder = st.empty()
|
64 |
-
# final_emotion_placeholder = st.empty()
|
65 |
-
# audio = pyaudio.PyAudio()
|
66 |
-
# audio_buffer = np.array([])
|
67 |
-
# emotion_prediction = "Aucune prédiction"
|
68 |
-
# is_recording = False
|
69 |
-
|
70 |
-
# if start_stop:
|
71 |
-
# is_recording = not is_recording
|
72 |
-
|
73 |
-
# # Variables globales pour le partage de données entre threads
|
74 |
-
# def audio_callback(in_data, frame_count, time_info, status):
|
75 |
-
# global audio_buffer
|
76 |
-
# audio_data = np.frombuffer(in_data, dtype=np.float32)
|
77 |
-
# audio_buffer = np.concatenate((audio_buffer, audio_data))
|
78 |
-
# return (in_data, pyaudio.paContinue)
|
79 |
-
|
80 |
-
# def predict_emotion_thread():
|
81 |
-
# global audio_buffer, emotion_prediction
|
82 |
-
# while is_recording:
|
83 |
-
# if len(audio_buffer) >= CHUNK:
|
84 |
-
# chunk = audio_buffer[:CHUNK]
|
85 |
-
# audio_buffer = audio_buffer[STRIDE:]
|
86 |
-
# emotion_prediction = predict_emotion(chunk, output_probs=False, sampling_rate=RATE) # Utilisez votre modèle ici
|
87 |
-
# # time.sleep(0.1)
|
88 |
-
|
89 |
-
# if is_recording:
|
90 |
-
# audio_buffer = np.array([])
|
91 |
-
# stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True,
|
92 |
-
# frames_per_buffer=CHUNK, stream_callback=audio_callback)
|
93 |
-
# stream.start_stream()
|
94 |
-
# threading.Thread(target=predict_emotion_thread, daemon=True).start()
|
95 |
-
# st.write("Enregistrement en cours...")
|
96 |
-
# else:
|
97 |
-
# stream.stop_stream()
|
98 |
-
# stream.close()
|
99 |
-
# st.write("Enregistrement arrêté.")
|
100 |
-
|
101 |
-
# emotion_display = st.empty()
|
102 |
-
|
103 |
-
# while is_recording:
|
104 |
-
# emotion_display.write(f"Émotion détectée : {emotion_prediction}")
|
105 |
-
# # time.sleep(0.1)
|
106 |
-
|
107 |
-
# audio.terminate(
|
108 |
-
|
109 |
-
|
110 |
-
# stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
|
111 |
-
|
112 |
-
# frames = []
|
113 |
-
# real_time_emotions = []
|
114 |
-
|
115 |
-
# while not stop_button:
|
116 |
-
# data = stream.read(CHUNK)
|
117 |
-
# frames.append(data)
|
118 |
-
|
119 |
-
# # Traitement en temps réel (par tranche de 1 seconde)
|
120 |
-
# if len(frames) >= RATE // CHUNK:
|
121 |
-
# audio_segment = np.frombuffer(b''.join(frames[-(RATE // CHUNK):]), dtype=np.int16)
|
122 |
-
# emotion = predict_emotion(audio_segment, output_probs=False, sampling_rate=RATE)
|
123 |
-
# real_time_emotions.append(emotion)
|
124 |
-
# emotion_placeholder.line_chart(real_time_emotions) # Affichage graphique des émotions
|
125 |
-
|
126 |
-
# # Arrêt de l'enregistrement
|
127 |
-
# stream.stop_stream()
|
128 |
-
# stream.close()
|
129 |
-
# audio.terminate()
|
130 |
-
|
131 |
-
# # Sauvegarde de l'audio enregistré
|
132 |
-
# wf = wave.open("output.wav", "wb")
|
133 |
-
# wf.setnchannels(CHANNELS)
|
134 |
-
# wf.setsampwidth(audio.get_sample_size(FORMAT))
|
135 |
-
# wf.setframerate(RATE)
|
136 |
-
# wf.writeframes(b"".join(frames))
|
137 |
-
# wf.close()
|
138 |
-
|
139 |
-
# # Prédiction finale sur tout l'audio enregistré
|
140 |
-
# full_audio_data = np.frombuffer(b''.join(frames), dtype=np.int16)
|
141 |
-
# final_emotion = predict_emotion(full_audio_data)
|
142 |
-
|
143 |
-
# final_emotion_placeholder.write(f"Émotion finale prédite : {final_emotion}")
|
144 |
-
|
145 |
-
|
146 |
-
##############################################"end realtime audio record"##############################################
|
147 |
-
|
148 |
-
with tab3:
|
149 |
-
st.header("📝 Speech2Text Transcription")
|
150 |
-
st.write("Here you can get the audio transcript.")
|
151 |
-
|
152 |
-
save = st.checkbox("Save transcription to .txt", value=False, key="save-transcript")
|
153 |
-
|
154 |
-
############################# A décommenté quand ce sera débogué
|
155 |
-
if st.button("Transcribe", key="transcribe-button"):
|
156 |
-
# Fonction pour transcrire l'audio
|
157 |
-
transcription = transcribe_audio(st.audio)
|
158 |
-
|
159 |
-
# Charger et transcrire l'audio
|
160 |
-
# audio, rate = load_audio(audio_file_path) # (re)chargement de l'audio si nécessaire
|
161 |
-
transcription = transcribe_audio(audio_file, sampling_rate=16000)
|
162 |
-
|
163 |
-
# Afficher la transcription
|
164 |
-
st.write("Transcription :", transcription)
|
165 |
-
|
166 |
-
st.success("Audio registered successfully.")
|
167 |
-
if save:
|
168 |
-
file_path = "transcript.txt"
|
169 |
-
|
170 |
-
# Write the text to the file
|
171 |
-
with open(file_path, "w") as file:
|
172 |
-
file.write(transcription)
|
173 |
-
|
174 |
-
st.success(f"Text saved to {file_path}")
|
175 |
-
|
176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|