diff --git a/.github/workflows/check_file_size.yml b/.github/workflows/check_file_size.yml new file mode 100644 index 0000000000000000000000000000000000000000..20a13c3c025a89bd507f0497d7e2d34ee2742fb6 --- /dev/null +++ b/.github/workflows/check_file_size.yml @@ -0,0 +1,16 @@ +name: Check file size +on: # or directly `on: [push]` to run the action on every push on any branch + pull_request: + branches: [main] + + # to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + sync-to-hub: + runs-on: ubuntu-latest + steps: + - name: Check large files + uses: ActionsDesk/lfs-warning@v2.0 + with: + filesizelimit: 10485760 # this is 10MB so we can sync to HF Spaces \ No newline at end of file diff --git a/.github/workflows/sync_hf.yml b/.github/workflows/sync_hf.yml new file mode 100644 index 0000000000000000000000000000000000000000..063f09899ece8e9753eb213f16bd2b9ee7713286 --- /dev/null +++ b/.github/workflows/sync_hf.yml @@ -0,0 +1,20 @@ +name: Sync to Hugging Face hub +on: + push: + branches: [main] + + # to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + sync-to-hub: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + lfs: true + - name: Push to hub + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + run: git push https://HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/jdalfonso/SISE-ULTIMATE-CHALLENGE main \ No newline at end of file diff --git a/.gitignore b/.gitignore index 40f05c36251b2bb642f91d3445bb55148dd70843..5cc2cc05e44c3e0ead61975d8e1eb754e7b2c82c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ __pycache__/ *.py[cod] *$py.class +.idea/ # C extensions *.so @@ -178,6 +179,10 @@ dataset/ old/ *.wav data/* - +*.pth +old/ # Mac .DS_Store +.idea +wav2vec2_emotion/ +dataset/ \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..13566b81b018ad684f3a35fee301741b2734c8f4 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.streamlit/config.toml b/.streamlit/config.toml index 06da5537aa94488368c7dd101a8c5851e225cf78..b0a08fdd37212d8ab1d96a3bd21e2855bffcc1c0 100644 --- a/.streamlit/config.toml +++ b/.streamlit/config.toml @@ -1,4 +1,4 @@ [theme] -base="dark" +base="light" primaryColor="#7c99b4" diff --git a/README.md b/README.md index d82606784c325f8d01a7e77c5a27b46d3f4e4dc4..96844de47b1d3201f5a04473ab18ab18e4c21cad 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # SISE Ultimate Challenge -![Logo du Ultimate Challenge SISE](img/logo.png) +![Logo du Ultimate Challenge SISE](img/logo_01.png) Ceci est le Ultimate Challenge pour le Master SISE. diff --git a/app.py b/app.py index 9d746377604f82bf788a2c4bc27ee9ef78949b64..bfe69bc84513eaa773411dbed0a82165876dfbb0 100644 --- a/app.py +++ b/app.py @@ -1,34 +1,248 @@ import streamlit as st -from streamlit_option_menu import option_menu -from views.application import application -from views.about import about - -# Set the logo -st.sidebar.image("img/logo.png", use_container_width=True) - -# Create a sidebar with navigation options -# Sidebar navigation with streamlit-option-menu -with st.sidebar: - # st.image("img/logo.png", use_container_width=True) - # st.markdown("

SecureIA Dashboard

", unsafe_allow_html=True) - # Navigation menu with icons - selected_tab = option_menu( - menu_title=None, # Added menu_title parameter - options=["Application", "About"], - icons=["robot", "bar-chart", "robot"], - menu_icon="cast", - default_index=0, - # styles={ - # "container": {"padding": "5px", "background-color": "#f0f2f6"}, - # "icon": {"color": "orange", "font-size": "18px"}, - # "nav-link": {"font-size": "16px", "text-align": "left", "margin": "0px", "color": "black"}, - # "nav-link-selected": {"background-color": "#4CAF50", "color": "white"}, - # } +import pandas as pd +import numpy as np +import os +import time +import matplotlib.pyplot as plt +from datetime import datetime +import tempfile +import io +import json +from model.transcriber import transcribe_audio +from predict import predict_emotion + +# You'll need to install this package: +# pip install streamlit-audiorec +from st_audiorec import st_audiorec + +# Page configuration +st.set_page_config( + page_title="Emotion Analyser", + page_icon="🎤", + layout="wide" +) + +# Initialize session state variables if they don't exist +if 'audio_data' not in st.session_state: + st.session_state.audio_data = [] +if 'current_audio_index' not in st.session_state: + st.session_state.current_audio_index = -1 +if 'audio_history_csv' not in st.session_state: + # Define columns for our CSV storage + st.session_state.audio_history_csv = pd.DataFrame( + columns=['timestamp', 'file_path', 'transcription', 'emotion', 'probabilities'] + ) +if 'needs_rerun' not in st.session_state: + st.session_state.needs_rerun = False + +# Function to ensure we keep only the last 10 entries +def update_audio_history(new_entry): + # Add the new entry + st.session_state.audio_history_csv = pd.concat([st.session_state.audio_history_csv, pd.DataFrame([new_entry])], ignore_index=True) + + # Keep only the last 10 entries + if len(st.session_state.audio_history_csv) > 10: + st.session_state.audio_history_csv = st.session_state.audio_history_csv.iloc[-10:] + + # Save to CSV + st.session_state.audio_history_csv.to_csv('audio_history.csv', index=False) + +# Function to process audio and get results +def process_audio(audio_path): + try: + # Get transcription + transcription = transcribe_audio(audio_path) + + # Get emotion prediction + predicted_emotion, probabilities = predict_emotion(audio_path) + + # Update audio history + new_entry = { + 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + 'file_path': audio_path, + 'transcription': transcription, + 'emotion': predicted_emotion, + 'probabilities': str(probabilities) # Convert dict to string for storage + } + update_audio_history(new_entry) + + # Update current index + st.session_state.current_audio_index = len(st.session_state.audio_history_csv) - 1 + + return transcription, predicted_emotion, probabilities + except Exception as e: + st.error(f"Error processing audio: {str(e)}") + return None, None, None + +# Function to split audio into 10-second segments +def split_audio(audio_file, segment_length=10): + # This is a placeholder - in a real implementation, you'd use a library like pydub + # to split the audio file into segments + st.warning("Audio splitting functionality is a placeholder. Implement with pydub or similar library.") + # For now, we'll just return the whole file as a single segment + return [audio_file] + +# Function to display emotion visualization +def display_emotion_chart(probabilities): + emotions = list(probabilities.keys()) + values = list(probabilities.values()) + + fig, ax = plt.subplots(figsize=(10, 5)) + bars = ax.bar(emotions, values, color=['red', 'gray', 'green']) + + # Add data labels on top of bars + for bar in bars: + height = bar.get_height() + ax.text(bar.get_x() + bar.get_width()/2., height + 0.02, + f'{height:.2f}', ha='center', va='bottom') + + ax.set_ylim(0, 1.1) + ax.set_ylabel('Probability') + ax.set_title('Emotion Prediction Results') + + st.pyplot(fig) + +# Trigger rerun if needed (replaces experimental_rerun) +if st.session_state.needs_rerun: + st.session_state.needs_rerun = False + st.rerun() # Using st.rerun() instead of experimental_rerun + +# Main App Layout +st.image("./img/logo_01.png", width=400) + +# Create two columns for the main layout +col1, col2 = st.columns([1, 1]) + +with col1: + st.header("Audio Input") + + # Method selection + + tab1, tab2 = st.tabs(["Record Audio", "Upload Audio"]) + + with tab1: + st.write("Record your audio (max 10 seconds):") + + # Using streamlit-audiorec for better recording functionality + wav_audio_data = st_audiorec() + + if wav_audio_data is not None: + # Save the recorded audio to a temporary file + with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: + tmp_file.write(wav_audio_data) + tmp_file_path = tmp_file.name + + st.success("Audio recorded successfully!") + + # Process button + if st.button("Process Recorded Audio"): + # Process the audio + with st.spinner("Processing audio..."): + transcription, emotion, probs = process_audio(tmp_file_path) + # Set flag for rerun instead of calling experimental_rerun + if transcription is not None: + st.success("Audio processed successfully!") + st.session_state.needs_rerun = True + + with tab2: + uploaded_file = st.file_uploader("Upload an audio file (WAV format)", type=['wav']) + + if uploaded_file is not None: + # Save the uploaded file to a temporary location + with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: + tmp_file.write(uploaded_file.getbuffer()) + tmp_file_path = tmp_file.name + + st.audio(uploaded_file, format="audio/wav") + + # Process button + if st.button("Process Uploaded Audio"): + # Split audio into 10-second segments + with st.spinner("Processing audio..."): + segments = split_audio(tmp_file_path) + + # Process each segment + for i, segment_path in enumerate(segments): + st.write(f"Processing segment {i+1}...") + transcription, emotion, probs = process_audio(segment_path) + + # Set flag for rerun instead of calling experimental_rerun + st.success("Audio processed successfully!") + st.session_state.needs_rerun = True + +with col2: + st.header("Results") + + # Display results if available + if st.session_state.current_audio_index >= 0 and len(st.session_state.audio_history_csv) > 0: + current_data = st.session_state.audio_history_csv.iloc[st.session_state.current_audio_index] + + # Transcription + st.subheader("Transcription") + st.text_area("", value=current_data['transcription'], height=100, key="transcription_area") + + # Emotion + st.subheader("Detected Emotion") + st.info(f"🎭 Predicted emotion: **{current_data['emotion']}**") + + # Convert string representation of dict back to actual dict + try: + import ast + probs = ast.literal_eval(current_data['probabilities']) + display_emotion_chart(probs) + except Exception as e: + st.error(f"Error parsing probabilities: {str(e)}") + st.write(f"Raw probabilities: {current_data['probabilities']}") + else: + st.info("Record or upload audio to see results") + +# Audio History and Analytics Section +st.header("Audio History and Analytics") + +if len(st.session_state.audio_history_csv) > 0: + # Display a select box to choose from audio history + timestamps = st.session_state.audio_history_csv['timestamp'].tolist() + selected_timestamp = st.selectbox( + "Select audio from history:", + options=timestamps, + index=len(timestamps) - 1 # Default to most recent ) + # Update current index when selection changes + selected_index = st.session_state.audio_history_csv[ + st.session_state.audio_history_csv['timestamp'] == selected_timestamp + ].index[0] + + # Only update if different + if st.session_state.current_audio_index != selected_index: + st.session_state.current_audio_index = selected_index + st.session_state.needs_rerun = True -if selected_tab == "Application": - application() -elif selected_tab == "About": - about() - \ No newline at end of file + # Analytics button + if st.button("Run Analytics on Selected Audio"): + st.subheader("Analytics Results") + + # Get the selected audio data + selected_data = st.session_state.audio_history_csv.iloc[selected_index] + + # Display analytics (this is where you would add more sophisticated analytics) + st.write(f"Selected Audio: {selected_data['timestamp']}") + st.write(f"Emotion: {selected_data['emotion']}") + st.write(f"File Path: {selected_data['file_path']}") + + # Add any additional analytics you want here + + # Try to play the selected audio + try: + if os.path.exists(selected_data['file_path']): + st.audio(selected_data['file_path'], format="audio/wav") + else: + st.warning("Audio file not found - it may have been deleted or moved.") + except Exception as e: + st.error(f"Error playing audio: {str(e)}") +else: + st.info("No audio history available. Record or upload audio to create history.") + +# Footer +st.markdown("---") +st.caption("Audio Emotion Analyzer - Processes audio in 10-second segments and predicts emotions") \ No newline at end of file diff --git a/config.py b/config.py new file mode 100644 index 0000000000000000000000000000000000000000..c7dbc9d5715b93eb930cfe7a5f05d49001a37c3a --- /dev/null +++ b/config.py @@ -0,0 +1,25 @@ +import os +import torch +from dotenv import load_dotenv + +# Charger les variables d'environnement +load_dotenv() +HF_API_KEY = os.getenv("HF_API_KEY") + +if not HF_API_KEY: + raise ValueError("Le token Hugging Face n'a pas été trouvé dans .env") + +# Labels d'émotions +LABELS = {"colere": 0, "neutre": 1, "joie": 2} +#LABELS = ["colere", "neutre", "joie"] +NUM_LABELS = len(LABELS) + +# Choisir le device +DEVICE = "cuda" if torch.cuda.is_available() else "cpu" + +# Modèle Wav2Vec2 +MODEL_NAME = "facebook/wav2vec2-large-xlsr-53-french" + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +BEST_MODEL_NAME = os.path.join(BASE_DIR, "model","fr-speech-emotion-model.pth") # Monte d'un niveau pour aller à la racine + diff --git a/img/logo.png b/img/logo.png deleted file mode 100644 index 100b4caf051527c0ee146eaa1be558db1ff3944e..0000000000000000000000000000000000000000 Binary files a/img/logo.png and /dev/null differ diff --git a/img/logo_01.png b/img/logo_01.png new file mode 100644 index 0000000000000000000000000000000000000000..8d86f28750b420dc7741942b36a0e74d9603498f Binary files /dev/null and b/img/logo_01.png differ diff --git a/model/__init__.py b/model/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/model/emotion_classifier.py b/model/emotion_classifier.py new file mode 100644 index 0000000000000000000000000000000000000000..58d48b9af917165bc6c77f591d91489beff6c17f --- /dev/null +++ b/model/emotion_classifier.py @@ -0,0 +1,54 @@ + + +# Prédit 33% environ partout (dans le cas 3 classes) + +# class EmotionClassifier(nn.Module): +# def __init__(self, feature_dim, num_labels): +# super(EmotionClassifier, self).__init__() +# self.fc1 = nn.Linear(feature_dim, 256) +# self.relu = nn.ReLU() +# self.dropout = nn.Dropout(0.3) +# self.fc2 = nn.Linear(256, num_labels) + +# def forward(self, x): +# x = self.fc1(x) +# x = self.relu(x) +# x = self.dropout(x) +# return self.fc2(x) + + + +import torch +import torch.nn as nn +import torch.nn.functional as F + +class Attention(nn.Module): + """Mécanisme d’attention permettant de pondérer l’importance des caractéristiques audio""" + def __init__(self, hidden_dim): + super(Attention, self).__init__() + self.attention_weights = nn.Linear(hidden_dim, 1) + + def forward(self, lstm_output): + # lstm_output: (batch_size, sequence_length, hidden_dim) + attention_scores = self.attention_weights(lstm_output) # (batch_size, sequence_length, 1) + attention_weights = torch.softmax(attention_scores, dim=1) # Normalisation softmax + weighted_output = lstm_output * attention_weights # Pondération des features + return weighted_output.sum(dim=1) # Somme pondérée sur la séquence + +class EmotionClassifier(nn.Module): + """Modèle de classification des émotions basé sur BiLSTM et attention""" + def __init__(self, feature_dim, num_labels, hidden_dim=128): + super(EmotionClassifier, self).__init__() + self.lstm = nn.LSTM(feature_dim, hidden_dim, batch_first=True, bidirectional=True) + self.attention = Attention(hidden_dim * 2) # Bidirectionnel → hidden_dim * 2 + self.fc = nn.Linear(hidden_dim * 2, num_labels) # Couche de classification finale + + def forward(self, x): + lstm_out, _ = self.lstm(x) # (batch_size, sequence_length, hidden_dim*2) + attention_out = self.attention(lstm_out) # (batch_size, hidden_dim*2) + logits = self.fc(attention_out) # (batch_size, num_labels) + return logits + + + + diff --git a/model/feature_extractor.py b/model/feature_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..13d0b60995bdf465611a6b792b5b455e4e05c8e2 --- /dev/null +++ b/model/feature_extractor.py @@ -0,0 +1,6 @@ +import torch +from transformers import Wav2Vec2Model, Wav2Vec2Processor +from config import MODEL_NAME, DEVICE + +processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME) +feature_extractor = Wav2Vec2Model.from_pretrained(MODEL_NAME).to(DEVICE) diff --git a/model/transcriber.py b/model/transcriber.py new file mode 100644 index 0000000000000000000000000000000000000000..90ca6d9f27690c6285016d06cbc60dbd9d08fc00 --- /dev/null +++ b/model/transcriber.py @@ -0,0 +1,35 @@ +import os +import torch +import librosa +from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC + +# Charger le modèle et le processeur +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +MODEL_NAME = "facebook/wav2vec2-large-xlsr-53-french" + +processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME) +model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME).to(device) +model.eval() + +def transcribe_audio(audio_path, sampling_rate=16000): + # Charger l'audio + audio, sr = librosa.load(audio_path, sr=sampling_rate) + + # Transformer l'audio en entrée pour le modèle + input_values = processor(audio, sampling_rate=sampling_rate, return_tensors="pt").input_values.to(device) + + # Obtenir les prédictions + with torch.no_grad(): + logits = model(input_values).logits + + # Décoder les prédictions en texte + predicted_ids = torch.argmax(logits, dim=-1) + transcription = processor.batch_decode(predicted_ids)[0] + return transcription + +# Exemple d'utilisation +if __name__ == "__main__": + base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data")) + audio_path = os.path.join(base_path, "colere", "c1af.wav") + texte = transcribe_audio(audio_path) + print(f"Transcription : {texte}") diff --git a/predict.py b/predict.py new file mode 100644 index 0000000000000000000000000000000000000000..87681dc371d665d197040fc4d033ebf69d68e66b --- /dev/null +++ b/predict.py @@ -0,0 +1,57 @@ +import sys +import os +import torch +import librosa +import numpy as np +from model.emotion_classifier import EmotionClassifier +from utils.preprocessing import collate_fn +from config import DEVICE, NUM_LABELS, BEST_MODEL_NAME + +# Charger le modèle entraîné +feature_dim = 40 # Nombre de MFCCs utilisés +model = EmotionClassifier(feature_dim, NUM_LABELS).to(DEVICE) +model.load_state_dict(torch.load(BEST_MODEL_NAME, map_location=DEVICE)) +model.eval() # Mode évaluation + +# Labels des émotions +LABELS = {0: "colère", 1: "neutre", 2: "joie"} + +# Fonction pour prédire l’émotion d’un fichier audio avec probabilités +def predict_emotion(audio_path, max_length=128): + # Charger l’audio + y, sr = librosa.load(audio_path, sr=16000) + + # Extraire les MFCCs + mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40) + + # Ajuster la taille des MFCCs avec padding/troncature + if mfcc.shape[1] > max_length: + mfcc = mfcc[:, :max_length] # Tronquer si trop long + else: + pad_width = max_length - mfcc.shape[1] + mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant') + + # Convertir en tenseur PyTorch + input_tensor = torch.tensor(mfcc.T, dtype=torch.float32).unsqueeze(0).to(DEVICE) # (1, max_length, 40) + + # Prédiction avec le modèle + with torch.no_grad(): + logits = model(input_tensor) + probabilities = torch.nn.functional.softmax(logits, dim=-1).cpu().numpy().flatten() # Convertir en probabilités + predicted_class = torch.argmax(logits, dim=-1).item() + + # Associer les probabilités aux labels + probabilities_dict = {LABELS[i]: float(probabilities[i]) for i in range(NUM_LABELS)} + + return LABELS[predicted_class], probabilities_dict + + +# Exemple d'utilisation +if __name__ == "__main__": + base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "data")) + audio_file = os.path.join(base_path, "colere", "c1ac.wav") + + predicted_emotion, probabilities = predict_emotion(audio_file) + + print(f"🎤 L'émotion prédite est : {predicted_emotion}") + print(f"📊 Probabilités par classe : {probabilities}") diff --git a/requirements.txt b/requirements.txt index df8bcaed5d776ee60fc8dfaf7e08a498883111e1..8e547a632e4c99d77df9f362258617ec7d171234 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,3 +15,5 @@ scikit-learn huggingface huggingface_hub pyaudio +streamlit_audiorec +dotenv \ No newline at end of file diff --git a/src/data/colere/c1ac.wav b/src/data/colere/c1ac.wav deleted file mode 100644 index de0b8dd9019ccc951cd8629881ce4ca8d25b3ec0..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1ac.wav and /dev/null differ diff --git a/src/data/colere/c1af.wav b/src/data/colere/c1af.wav deleted file mode 100644 index e1967a9476d751deeee0829c8243ad75926e2925..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1af.wav and /dev/null differ diff --git a/src/data/colere/c1aj.wav b/src/data/colere/c1aj.wav deleted file mode 100644 index c09590a452eb4b644898fa81fd7eef622cc154a3..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1aj.wav and /dev/null differ diff --git a/src/data/colere/c1an.wav b/src/data/colere/c1an.wav deleted file mode 100644 index 06686c906533609dba6f3f6e1d55027914c67d46..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1an.wav and /dev/null differ diff --git a/src/data/colere/c1bc.wav b/src/data/colere/c1bc.wav deleted file mode 100644 index 4c44ff67816c74daeea7c5fda572bb3337101bf0..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1bc.wav and /dev/null differ diff --git a/src/data/colere/c1bf.wav b/src/data/colere/c1bf.wav deleted file mode 100644 index 0bf087d6b04b648c2ac2a11c02a6449e5de39b06..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1bf.wav and /dev/null differ diff --git a/src/data/colere/c1bj.wav b/src/data/colere/c1bj.wav deleted file mode 100644 index 952b13d3c04eba087665f3dedfcbc36c20ecf6f7..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1bj.wav and /dev/null differ diff --git a/src/data/colere/c1bn.wav b/src/data/colere/c1bn.wav deleted file mode 100644 index 33ec132651db3818265b96205f6a278e672b876a..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1bn.wav and /dev/null differ diff --git a/src/data/colere/c1cc.wav b/src/data/colere/c1cc.wav deleted file mode 100644 index 8ff50bc7acb263721982227aa3b5232311e7755f..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1cc.wav and /dev/null differ diff --git a/src/data/colere/c1cf.wav b/src/data/colere/c1cf.wav deleted file mode 100644 index e4d6d8020307b58a16a8d21702929ff83250431b..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1cf.wav and /dev/null differ diff --git a/src/data/colere/c1cj.wav b/src/data/colere/c1cj.wav deleted file mode 100644 index 06aa30452f84fc6802156817c6916352fab369a3..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1cj.wav and /dev/null differ diff --git a/src/data/colere/c2ac.wav b/src/data/colere/c2ac.wav deleted file mode 100644 index 524dca5257d009affa31a3114ec9e6f8ab2ad59d..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c2ac.wav and /dev/null differ diff --git a/src/data/colere/c2af.wav b/src/data/colere/c2af.wav deleted file mode 100644 index 6560815da0edef8f5dafab89a79c257630a2ee8c..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c2af.wav and /dev/null differ diff --git a/src/data/colere/c2aj.wav b/src/data/colere/c2aj.wav deleted file mode 100644 index 8b3072eedb4811913f970f3560c15f1f7b6dc606..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c2aj.wav and /dev/null differ diff --git a/src/data/colere/c2an.wav b/src/data/colere/c2an.wav deleted file mode 100644 index a6cf62bcc41f3b0b14d21a798f97eb852a44d77b..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c2an.wav and /dev/null differ diff --git a/src/data/colere/c2bc.wav b/src/data/colere/c2bc.wav deleted file mode 100644 index d1404728c8d82cfa448cb473334f4299f85186ef..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c2bc.wav and /dev/null differ diff --git a/src/data/colere/c2bf.wav b/src/data/colere/c2bf.wav deleted file mode 100644 index 7916f8445a7e7e9c5e788d5086482b98c2de2a3b..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c2bf.wav and /dev/null differ diff --git a/src/data/colere/c2bj.wav b/src/data/colere/c2bj.wav deleted file mode 100644 index e75a7815503da5795e0c2ec0f0e7a38f4a4a7726..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c2bj.wav and /dev/null differ diff --git a/src/data/colere/c2bn.wav b/src/data/colere/c2bn.wav deleted file mode 100644 index 3572ffb0f9ee723c468d64c0560d9fa46c9e7453..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c2bn.wav and /dev/null differ diff --git a/src/data/colere/c2cn.wav b/src/data/colere/c2cn.wav deleted file mode 100644 index c5669792e40ab64ce51fd7552875a239f5aa0d21..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c2cn.wav and /dev/null differ diff --git a/src/data/colere/c3ac.wav b/src/data/colere/c3ac.wav deleted file mode 100644 index fdbbafaddb9b7f7b5b26c254503d87efd1907143..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c3ac.wav and /dev/null differ diff --git a/src/data/colere/c3af.wav b/src/data/colere/c3af.wav deleted file mode 100644 index c910e20f1768dd107f8f64d9e639b3a0335b5c4f..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c3af.wav and /dev/null differ diff --git a/src/data/colere/c3aj.wav b/src/data/colere/c3aj.wav deleted file mode 100644 index 2690efc1d43fb0d5e375f27612b23b877af5d52c..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c3aj.wav and /dev/null differ diff --git a/src/data/colere/c3an.wav b/src/data/colere/c3an.wav deleted file mode 100644 index cdb7dfae2514e330065534479d93c8fdcefc8f32..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c3an.wav and /dev/null differ diff --git a/src/data/colere/c3bc.wav b/src/data/colere/c3bc.wav deleted file mode 100644 index ae2ad5df7eeb3af635d79f98706d7f54e097aeca..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c3bc.wav and /dev/null differ diff --git a/src/data/colere/c3bf.wav b/src/data/colere/c3bf.wav deleted file mode 100644 index 2373fadc59c20c3b54461b2ab0c0e28572f9a895..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c3bf.wav and /dev/null differ diff --git a/src/data/colere/c3bj.wav b/src/data/colere/c3bj.wav deleted file mode 100644 index 2f69d7b93fb02052e0511ed1e169b4112a092f31..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c3bj.wav and /dev/null differ diff --git a/src/data/colere/c3bn.wav b/src/data/colere/c3bn.wav deleted file mode 100644 index d07cf0cd5705af273aa2f0e07f271c49c755caf7..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c3bn.wav and /dev/null differ diff --git a/src/data/colere/c4aaf.wav b/src/data/colere/c4aaf.wav deleted file mode 100644 index 5d907e2c92977e996324b5a53e112a3069356127..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c4aaf.wav and /dev/null differ diff --git a/src/data/colere/c4ac.wav b/src/data/colere/c4ac.wav deleted file mode 100644 index 5d3366b06e894f00a0c5e6e50dcbbfc8bbbfa68f..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c4ac.wav and /dev/null differ diff --git a/src/data/colere/c4af.wav b/src/data/colere/c4af.wav deleted file mode 100644 index 9773ee4395de583ae211f0ad2a128aaf08c0e1ac..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c4af.wav and /dev/null differ diff --git a/src/data/colere/c4aj.wav b/src/data/colere/c4aj.wav deleted file mode 100644 index 3d78610dcdfcb552a5294560aaef45c569a469ae..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c4aj.wav and /dev/null differ diff --git a/src/data/colere/c4an.wav b/src/data/colere/c4an.wav deleted file mode 100644 index ef8fe80cc9f42dc56a85e2c8a55d627f62538cbf..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c4an.wav and /dev/null differ diff --git a/src/data/colere/c4bc.wav b/src/data/colere/c4bc.wav deleted file mode 100644 index 68e71bffe723e4190a9f0b2036e047060a8abdf2..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c4bc.wav and /dev/null differ diff --git a/src/data/colere/c4bj.wav b/src/data/colere/c4bj.wav deleted file mode 100644 index 6d761876bba59855bf62305c427f2d2cf90afd8c..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c4bj.wav and /dev/null differ diff --git a/src/data/colere/c4bn.wav b/src/data/colere/c4bn.wav deleted file mode 100644 index 70a8946f12f8d81aac745600413fb2116ad7b9ef..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c4bn.wav and /dev/null differ diff --git a/src/data/colere/c5an.wav b/src/data/colere/c5an.wav deleted file mode 100644 index e2dad6a8fbfbe716a7f156dc80de5e0d0547730f..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c5an.wav and /dev/null differ diff --git a/src/data/colere/c5c.wav b/src/data/colere/c5c.wav deleted file mode 100644 index 30d81f2fb02d01b39e401cf8bf61a96693189d45..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c5c.wav and /dev/null differ diff --git a/src/data/colere/c5f.wav b/src/data/colere/c5f.wav deleted file mode 100644 index 2ff974fedca62f277e712c8e5ab81a73ffe7bb78..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c5f.wav and /dev/null differ diff --git a/src/data/colere/c5j.wav b/src/data/colere/c5j.wav deleted file mode 100644 index 56876114625a8ff283758447a31a4e39be1194be..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c5j.wav and /dev/null differ diff --git a/src/data/dataset.csv b/src/data/dataset.csv deleted file mode 100644 index 83b66cc25327ea20a3cac2d3d6e3a054cb305422..0000000000000000000000000000000000000000 --- a/src/data/dataset.csv +++ /dev/null @@ -1,113 +0,0 @@ -dossier,emotion -colere/c1ac.wav,colere -colere/c1af.wav,colere -colere/c1aj.wav,colere -colere/c1an.wav,colere -colere/c1bc.wav,colere -colere/c1bf.wav,colere -colere/c1bj.wav,colere -colere/c1bn.wav,colere -colere/c1cc.wav,colere -colere/c1cf.wav,colere -colere/c1cj.wav,colere -colere/c2ac.wav,colere -colere/c2af.wav,colere -colere/c2aj.wav,colere -colere/c2an.wav,colere -colere/c2bc.wav,colere -colere/c2bf.wav,colere -colere/c2bj.wav,colere -colere/c2bn.wav,colere -colere/c2cn.wav,colere -colere/c3ac.wav,colere -colere/c3af.wav,colere -colere/c3aj.wav,colere -colere/c3an.wav,colere -colere/c3bc.wav,colere -colere/c3bf.wav,colere -colere/c3bj.wav,colere -colere/c3bn.wav,colere -colere/c4aaf.wav,colere -colere/c4ac.wav,colere -colere/c4af.wav,colere -colere/c4aj.wav,colere -colere/c4an.wav,colere -colere/c4bc.wav,colere -colere/c4bj.wav,colere -colere/c4bn.wav,colere -colere/c5an.wav,colere -colere/c5c.wav,colere -colere/c5f.wav,colere -colere/c5j.wav,colere -neutre/n1ac.wav,neutre -neutre/n1af.wav,neutre -neutre/n1aj.wav,neutre -neutre/n1an.wav,neutre -neutre/n1bc.wav,neutre -neutre/n1bf.wav,neutre -neutre/n1bj.wav,neutre -neutre/n1bn.wav,neutre -neutre/n2ac.wav,neutre -neutre/n2af.wav,neutre -neutre/n2aj.wav,neutre -neutre/n2an.wav,neutre -neutre/n2bc.wav,neutre -neutre/n2bf.wav,neutre -neutre/n2bj.wav,neutre -neutre/n2bn.wav,neutre -neutre/n3ac.wav,neutre -neutre/n3af.wav,neutre -neutre/n3aj.wav,neutre -neutre/n3an.wav,neutre -neutre/n3bc.wav,neutre -neutre/n3bf.wav,neutre -neutre/n3bj.wav,neutre -neutre/n3bn.wav,neutre -neutre/n4ac.wav,neutre -neutre/n4aj.wav,neutre -neutre/n4an.wav,neutre -neutre/n4f.wav,neutre -neutre/n5ac.wav,neutre -neutre/n5af.wav,neutre -neutre/n5aj.wav,neutre -neutre/n5an.wav,neutre -neutre/n5bc.wav,neutre -neutre/n5bf.wav,neutre -neutre/n5bj.wav,neutre -neutre/n5bn.wav,neutre -joie/h1ac.wav,joie -joie/h1af.wav,joie -joie/h1aj.wav,joie -joie/h1an.wav,joie -joie/h1bc.wav,joie -joie/h1bf.wav,joie -joie/h1bj.wav,joie -joie/h1bn.wav,joie -joie/h21f.wav,joie -joie/h2ac.wav,joie -joie/h2aj.wav,joie -joie/h2an.wav,joie -joie/h2bc.wav,joie -joie/h2bf.wav,joie -joie/h2bj.wav,joie -joie/h2bn.wav,joie -joie/h3ac.wav,joie -joie/h3af.wav,joie -joie/h3aj.wav,joie -joie/h3anwav.wav,joie -joie/h3bc.wav,joie -joie/h3bf.wav,joie -joie/h3bj.wav,joie -joie/h3bn.wav,joie -joie/h4ac.wav,joie -joie/h4af.wav,joie -joie/h4aj.wav,joie -joie/h4an.wav,joie -joie/h4bc.wav,joie -joie/h4bf.wav,joie -joie/h4bj.wav,joie -joie/h4bn.wav,joie -joie/h5an.wav,joie -joie/h5c.wav,joie -joie/h5f.wav,joie -joie/h5j.wav,joie diff --git a/src/data/joie/.DS_Store b/src/data/joie/.DS_Store deleted file mode 100644 index 5008ddfcf53c02e82d7eee2e57c38e5672ef89f6..0000000000000000000000000000000000000000 Binary files a/src/data/joie/.DS_Store and /dev/null differ diff --git a/src/data/joie/h1ac.wav b/src/data/joie/h1ac.wav deleted file mode 100644 index 365b5e33c61769264184e3dc6500bd4c3bc97d27..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h1ac.wav and /dev/null differ diff --git a/src/data/joie/h1af.wav b/src/data/joie/h1af.wav deleted file mode 100644 index d1e83a0dc15c85292082dafffa7e42f82ac442ba..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h1af.wav and /dev/null differ diff --git a/src/data/joie/h1aj.wav b/src/data/joie/h1aj.wav deleted file mode 100644 index b5026f846cba16f5644f657bbd78cf0f53c48d47..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h1aj.wav and /dev/null differ diff --git a/src/data/joie/h1an.wav b/src/data/joie/h1an.wav deleted file mode 100644 index 9f5738a18afb016c678c054e87fd64d9e86d7683..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h1an.wav and /dev/null differ diff --git a/src/data/joie/h1bc.wav b/src/data/joie/h1bc.wav deleted file mode 100644 index bb046d0f5f400db7aa18d7bf58cec04e7fd6c894..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h1bc.wav and /dev/null differ diff --git a/src/data/joie/h1bf.wav b/src/data/joie/h1bf.wav deleted file mode 100644 index 315810b5fcbee4409ac682d86bb69b91d6c565bc..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h1bf.wav and /dev/null differ diff --git a/src/data/joie/h1bj.wav b/src/data/joie/h1bj.wav deleted file mode 100644 index 8e0fedd2096f1056fda0da1b8bb3914bf264a200..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h1bj.wav and /dev/null differ diff --git a/src/data/joie/h1bn.wav b/src/data/joie/h1bn.wav deleted file mode 100644 index 1ce54c92e6539c176ab10ef10ab4d006d2dfc0b2..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h1bn.wav and /dev/null differ diff --git a/src/data/joie/h21f.wav b/src/data/joie/h21f.wav deleted file mode 100644 index 5d7a790132fb070d93e575f811dac8edd1bd5427..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h21f.wav and /dev/null differ diff --git a/src/data/joie/h2ac.wav b/src/data/joie/h2ac.wav deleted file mode 100644 index 33791d25b7c5eea0d6961e814c6d4262f5027a95..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h2ac.wav and /dev/null differ diff --git a/src/data/joie/h2aj.wav b/src/data/joie/h2aj.wav deleted file mode 100644 index e90c82f5049ad1e06422cad974f3f21394cab115..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h2aj.wav and /dev/null differ diff --git a/src/data/joie/h2an.wav b/src/data/joie/h2an.wav deleted file mode 100644 index d648fcab9d2d0d8a19d531001531242de3a7cfcb..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h2an.wav and /dev/null differ diff --git a/src/data/joie/h2bc.wav b/src/data/joie/h2bc.wav deleted file mode 100644 index 07bb8b097d742140b24e511d062f0fddff8c0a3b..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h2bc.wav and /dev/null differ diff --git a/src/data/joie/h2bf.wav b/src/data/joie/h2bf.wav deleted file mode 100644 index 9031d5e7b4272cd28394f90a3ad026c74786ee9b..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h2bf.wav and /dev/null differ diff --git a/src/data/joie/h2bj.wav b/src/data/joie/h2bj.wav deleted file mode 100644 index d83e34bace9a886c1313ee31b108fe789f444b27..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h2bj.wav and /dev/null differ diff --git a/src/data/joie/h2bn.wav b/src/data/joie/h2bn.wav deleted file mode 100644 index acdb5f6afd5460a0e4b7ab355b5516a2eba5ad3b..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h2bn.wav and /dev/null differ diff --git a/src/data/joie/h3ac.wav b/src/data/joie/h3ac.wav deleted file mode 100644 index 5e264c1a29951abb40c7d0200cbe7f54e22defc6..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h3ac.wav and /dev/null differ diff --git a/src/data/joie/h3af.wav b/src/data/joie/h3af.wav deleted file mode 100644 index c761bf682554c77809e4639fbc1229a431d22a6b..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h3af.wav and /dev/null differ diff --git a/src/data/joie/h3aj.wav b/src/data/joie/h3aj.wav deleted file mode 100644 index 72cb0ab63a2ce21685e5b87a8cb634d58fe77478..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h3aj.wav and /dev/null differ diff --git a/src/data/joie/h3anwav.wav b/src/data/joie/h3anwav.wav deleted file mode 100644 index c7b60bb34eb44c992900e936ae2baa50555bb9fa..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h3anwav.wav and /dev/null differ diff --git a/src/data/joie/h3bc.wav b/src/data/joie/h3bc.wav deleted file mode 100644 index c0a014783eb8ffe2b12eb4bfb8efa7138df888b2..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h3bc.wav and /dev/null differ diff --git a/src/data/joie/h3bf.wav b/src/data/joie/h3bf.wav deleted file mode 100644 index c3e14c3c82b0a1a00d4d4770bc9ec0b4df2469a7..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h3bf.wav and /dev/null differ diff --git a/src/data/joie/h3bj.wav b/src/data/joie/h3bj.wav deleted file mode 100644 index 656b23179c5d8af1cf57d609e2a0e338080f5073..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h3bj.wav and /dev/null differ diff --git a/src/data/joie/h3bn.wav b/src/data/joie/h3bn.wav deleted file mode 100644 index a9d47024320f98f0115883b1d0f1921e47bf87a1..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h3bn.wav and /dev/null differ diff --git a/src/data/joie/h4ac.wav b/src/data/joie/h4ac.wav deleted file mode 100644 index ccdee2f3801542e054af868e734e4d5872963fe9..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h4ac.wav and /dev/null differ diff --git a/src/data/joie/h4af.wav b/src/data/joie/h4af.wav deleted file mode 100644 index 6498be3632495172f0507994b062b61308f73833..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h4af.wav and /dev/null differ diff --git a/src/data/joie/h4aj.wav b/src/data/joie/h4aj.wav deleted file mode 100644 index 654cd2ec6b3a64dd6cedf0aa4b8a808b387be550..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h4aj.wav and /dev/null differ diff --git a/src/data/joie/h4an.wav b/src/data/joie/h4an.wav deleted file mode 100644 index 3d5b734baf2c6b9638fbf2f28fe31be099ed980c..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h4an.wav and /dev/null differ diff --git a/src/data/joie/h4bc.wav b/src/data/joie/h4bc.wav deleted file mode 100644 index 2cb72dad7485a7f0503490a28ff69d20414d4de1..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h4bc.wav and /dev/null differ diff --git a/src/data/joie/h4bf.wav b/src/data/joie/h4bf.wav deleted file mode 100644 index b3a3cf9bc1453b4b712491e995f8cda8acb654d0..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h4bf.wav and /dev/null differ diff --git a/src/data/joie/h4bj.wav b/src/data/joie/h4bj.wav deleted file mode 100644 index 3bf1943162af54a1765e5ca923cf86ad6a7aac60..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h4bj.wav and /dev/null differ diff --git a/src/data/joie/h4bn.wav b/src/data/joie/h4bn.wav deleted file mode 100644 index fa0da93f23242b1276b1a4fe1bcd16f7d240750b..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h4bn.wav and /dev/null differ diff --git a/src/data/joie/h5an.wav b/src/data/joie/h5an.wav deleted file mode 100644 index 9a1b1ae25e0b597b1dcb1c798a2b1a04758ad31d..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h5an.wav and /dev/null differ diff --git a/src/data/joie/h5c.wav b/src/data/joie/h5c.wav deleted file mode 100644 index 57c8e9ac7c6b312fa7de327493946647e4d48f42..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h5c.wav and /dev/null differ diff --git a/src/data/joie/h5f.wav b/src/data/joie/h5f.wav deleted file mode 100644 index 6591f721de5d8529e7d7e3ebd914ef18a04a1f29..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h5f.wav and /dev/null differ diff --git a/src/data/joie/h5j.wav b/src/data/joie/h5j.wav deleted file mode 100644 index 438254f915751beeea4991fa989f26548faec30f..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h5j.wav and /dev/null differ diff --git a/src/data/neutre/n1ac.wav b/src/data/neutre/n1ac.wav deleted file mode 100644 index df8888182b1e7d26b7208736c00a4e4c827b2ed9..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n1ac.wav and /dev/null differ diff --git a/src/data/neutre/n1af.wav b/src/data/neutre/n1af.wav deleted file mode 100644 index f67dc5106ff4a363629e9e5759ea5a5117d0662d..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n1af.wav and /dev/null differ diff --git a/src/data/neutre/n1aj.wav b/src/data/neutre/n1aj.wav deleted file mode 100644 index e178b9938741c53c5e6298ad92425d63d11983d3..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n1aj.wav and /dev/null differ diff --git a/src/data/neutre/n1an.wav b/src/data/neutre/n1an.wav deleted file mode 100644 index 01585a6924876a4a6c18c80d1aaa02d27f0d2c3f..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n1an.wav and /dev/null differ diff --git a/src/data/neutre/n1bc.wav b/src/data/neutre/n1bc.wav deleted file mode 100644 index bb910de068a5ff76ac37010df55e6d8691079585..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n1bc.wav and /dev/null differ diff --git a/src/data/neutre/n1bf.wav b/src/data/neutre/n1bf.wav deleted file mode 100644 index be4c834d19f22fe3d297639db4343bc411c2bbac..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n1bf.wav and /dev/null differ diff --git a/src/data/neutre/n1bj.wav b/src/data/neutre/n1bj.wav deleted file mode 100644 index 8ede3179d45f938e574ac1dca63c84f4165220a4..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n1bj.wav and /dev/null differ diff --git a/src/data/neutre/n1bn.wav b/src/data/neutre/n1bn.wav deleted file mode 100644 index 06df31510bc66528c77b294f52c97f268ce87e23..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n1bn.wav and /dev/null differ diff --git a/src/data/neutre/n2ac.wav b/src/data/neutre/n2ac.wav deleted file mode 100644 index feeaeebe063373ec1b64a325ce0717f5758339f6..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n2ac.wav and /dev/null differ diff --git a/src/data/neutre/n2af.wav b/src/data/neutre/n2af.wav deleted file mode 100644 index 55a54bd3e58e65a57b44e56baca1e287f362ecc6..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n2af.wav and /dev/null differ diff --git a/src/data/neutre/n2aj.wav b/src/data/neutre/n2aj.wav deleted file mode 100644 index 59f93bdbf838fba19ba0e7d0816b598b81bd4184..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n2aj.wav and /dev/null differ diff --git a/src/data/neutre/n2an.wav b/src/data/neutre/n2an.wav deleted file mode 100644 index 4d93413421a30eb760a8dfabb532230aecaee3c6..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n2an.wav and /dev/null differ diff --git a/src/data/neutre/n2bc.wav b/src/data/neutre/n2bc.wav deleted file mode 100644 index 6f7f9575cda53160b02d5a69f5cc220ab9bfb96a..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n2bc.wav and /dev/null differ diff --git a/src/data/neutre/n2bf.wav b/src/data/neutre/n2bf.wav deleted file mode 100644 index 741ac264c281f98c69c45618b353606708b5b8b4..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n2bf.wav and /dev/null differ diff --git a/src/data/neutre/n2bj.wav b/src/data/neutre/n2bj.wav deleted file mode 100644 index 89c347dee5c5eb81591d2f6020beabd9af187042..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n2bj.wav and /dev/null differ diff --git a/src/data/neutre/n2bn.wav b/src/data/neutre/n2bn.wav deleted file mode 100644 index aa4ed5ac0d68bff9e35974749f1ae32af974146c..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n2bn.wav and /dev/null differ diff --git a/src/data/neutre/n3ac.wav b/src/data/neutre/n3ac.wav deleted file mode 100644 index d7820cff9112b655ff20a0af7c0f698ffed13473..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n3ac.wav and /dev/null differ diff --git a/src/data/neutre/n3af.wav b/src/data/neutre/n3af.wav deleted file mode 100644 index 68f96a78afbe6e489cd9890e691a365fb6a48b45..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n3af.wav and /dev/null differ diff --git a/src/data/neutre/n3aj.wav b/src/data/neutre/n3aj.wav deleted file mode 100644 index f773b81d7817332c75d92b5ef3de238444dbf528..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n3aj.wav and /dev/null differ diff --git a/src/data/neutre/n3an.wav b/src/data/neutre/n3an.wav deleted file mode 100644 index 7f878937d9bdb690ce0d7744608a2ddaa3030f58..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n3an.wav and /dev/null differ diff --git a/src/data/neutre/n3bc.wav b/src/data/neutre/n3bc.wav deleted file mode 100644 index 805840dc276cc7dd3e586b427618ce52dadb5961..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n3bc.wav and /dev/null differ diff --git a/src/data/neutre/n3bf.wav b/src/data/neutre/n3bf.wav deleted file mode 100644 index 25a9b796b52624bf40f48db62d8af1e075abd413..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n3bf.wav and /dev/null differ diff --git a/src/data/neutre/n3bj.wav b/src/data/neutre/n3bj.wav deleted file mode 100644 index 100191833e4b7b6ac0d873f5392726e68cb1df80..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n3bj.wav and /dev/null differ diff --git a/src/data/neutre/n3bn.wav b/src/data/neutre/n3bn.wav deleted file mode 100644 index a8b8747acf885991076db4d1e62a431397021b8c..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n3bn.wav and /dev/null differ diff --git a/src/data/neutre/n4ac.wav b/src/data/neutre/n4ac.wav deleted file mode 100644 index aa52669b5afcd60c00cc5850e322a8a8f9011d3c..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n4ac.wav and /dev/null differ diff --git a/src/data/neutre/n4aj.wav b/src/data/neutre/n4aj.wav deleted file mode 100644 index 50675d1f6e73f1bff0c768c554549491bd69b08f..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n4aj.wav and /dev/null differ diff --git a/src/data/neutre/n4an.wav b/src/data/neutre/n4an.wav deleted file mode 100644 index e475e8e5bb0d12eb41cc0464e48f8172db58409b..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n4an.wav and /dev/null differ diff --git a/src/data/neutre/n4f.wav b/src/data/neutre/n4f.wav deleted file mode 100644 index f04549721cb9271f4a2376c65589f2fdd99e0118..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n4f.wav and /dev/null differ diff --git a/src/data/neutre/n5ac.wav b/src/data/neutre/n5ac.wav deleted file mode 100644 index a525f07fc015351d0bcf47265179c83f4e827784..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n5ac.wav and /dev/null differ diff --git a/src/data/neutre/n5af.wav b/src/data/neutre/n5af.wav deleted file mode 100644 index 232357862a49cb8a0c85fa1d2f9f14562c97d9d0..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n5af.wav and /dev/null differ diff --git a/src/data/neutre/n5aj.wav b/src/data/neutre/n5aj.wav deleted file mode 100644 index 68f9c0e8a52504e82c7c5f1bee45676f77076fdd..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n5aj.wav and /dev/null differ diff --git a/src/data/neutre/n5an.wav b/src/data/neutre/n5an.wav deleted file mode 100644 index 6f6b32117e4dc5405925ad7ea23c4f97dacca673..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n5an.wav and /dev/null differ diff --git a/src/data/neutre/n5bc.wav b/src/data/neutre/n5bc.wav deleted file mode 100644 index da2385a2cce740655e6b23aec563fe4ca0793945..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n5bc.wav and /dev/null differ diff --git a/src/data/neutre/n5bf.wav b/src/data/neutre/n5bf.wav deleted file mode 100644 index f9c23dbfa6511a770362b5b0330512dafe38096e..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n5bf.wav and /dev/null differ diff --git a/src/data/neutre/n5bj.wav b/src/data/neutre/n5bj.wav deleted file mode 100644 index d999c3d54d8b17347fa81d74491e2ac64e672209..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n5bj.wav and /dev/null differ diff --git a/src/data/neutre/n5bn.wav b/src/data/neutre/n5bn.wav deleted file mode 100644 index 039b3a3b1ae7e27418e74ea0959da94aab27c3a5..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n5bn.wav and /dev/null differ diff --git a/src/data/processing.ipynb b/src/data/processing.ipynb deleted file mode 100644 index 787b4d5979bf4ebb980d47d3874e9d3dedcf1849..0000000000000000000000000000000000000000 --- a/src/data/processing.ipynb +++ /dev/null @@ -1,113 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 112 entries, 0 to 111\n", - "Data columns (total 2 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 dossier 112 non-null object\n", - " 1 emotion 112 non-null object\n", - "dtypes: object(2)\n", - "memory usage: 1.9+ KB\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "\n", - "data = pd.read_csv('dataset.csv', sep=',', header=0)\n", - "\n", - "data.info()\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " dossier emotion\n", - "0 colere/c1ac.wav colere\n", - "1 colere/c1af.wav colere\n", - "2 colere/c1aj.wav colere\n", - "3 colere/c1an.wav colere\n", - "4 colere/c1bc.wav colere\n", - ".. ... ...\n", - "107 joie/h4bn.wav joie\n", - "108 joie/h5an.wav joie\n", - "109 joie/h5c.wav joie\n", - "110 joie/h5f.wav joie\n", - "111 joie/h5j.wav joie\n", - "\n", - "[112 rows x 2 columns]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Evidya\\AppData\\Local\\Temp\\ipykernel_24704\\3726049179.py:8: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n", - " data = data.applymap(lambda x: x.strip() if isinstance(x, str) else x)\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "\n", - "\n", - "# 🔹 1. Supprimer les espaces des noms de colonnes\n", - "data.columns = data.columns.str.strip()\n", - "\n", - "# 🔹 2. Supprimer les espaces dans toutes les cellules (colonnes object)\n", - "data = data.applymap(lambda x: x.strip() if isinstance(x, str) else x)\n", - "\n", - "# Afficher le DataFrame corrigé\n", - "print(data)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# save to csv\n", - "data.to_csv('dataset.csv', index=False, sep=',')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/src/model/emotion_classifier.py b/src/model/emotion_classifier.py deleted file mode 100644 index 312e8de7dd6c6cfc69a7e979ccbf05658d4d19b6..0000000000000000000000000000000000000000 --- a/src/model/emotion_classifier.py +++ /dev/null @@ -1,17 +0,0 @@ -import torch -import torch.nn as nn -from transformers import Wav2Vec2Model - -class Wav2Vec2EmotionClassifier(nn.Module): - - def __init__(self, model_name="facebook/wav2vec2-large-xlsr-53-french", num_labels=3): - super(Wav2Vec2EmotionClassifier, self).__init__() - self.wav2vec2 = Wav2Vec2Model.from_pretrained(model_name) - self.fc = nn.Linear(self.wav2vec2.config.hidden_size, num_labels) - self.softmax = nn.Softmax(dim=1) - - def forward(self, input_values): - outputs = self.wav2vec2(input_values).last_hidden_state - pooled_output = torch.mean(outputs, dim=1) - logits = self.fc(pooled_output) - return self.softmax(logits) diff --git a/src/model/emotion_dataset.py b/src/model/emotion_dataset.py deleted file mode 100644 index b152a6bbe88d2e050635db0ecf4a36799bfb65f2..0000000000000000000000000000000000000000 --- a/src/model/emotion_dataset.py +++ /dev/null @@ -1,29 +0,0 @@ -import librosa -import torch -import pandas as pd -from torch.utils.data import Dataset -import os - -class EmotionDataset(Dataset): - def __init__(self, csv_file, processor): - self.data = pd.read_csv(csv_file, sep=",", header=0) - # print(self.data.info()) # Pour voir les premières lignes du dataset - self.processor = processor - self.emotion_labels = {"joie": 0, "colere": 1, "neutre": 2} - # print(self.data["emotion"].unique()) # Pour voir les valeurs exactes - - - def __len__(self): - return len(self.data) - - def __getitem__(self, idx): - base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data")) - audio_file = self.data.iloc[idx, 0] - label = self.emotion_labels[self.data.iloc[idx, 1].strip()] - - audio_path = os.path.join(base_path, audio_file) - waveform, _ = librosa.load(audio_path, sr=16000) # Chargement audio - input_values = self.processor(waveform, return_tensors="pt", sampling_rate=16000).input_values - - return input_values.squeeze(0), torch.tensor(label, dtype=torch.long) - diff --git a/src/model/predict.py b/src/model/predict.py deleted file mode 100644 index ad67ab365fd29b192d51d6eb83f4508a859c9544..0000000000000000000000000000000000000000 --- a/src/model/predict.py +++ /dev/null @@ -1,30 +0,0 @@ -import torch -from transformers import Wav2Vec2Processor -from model import Wav2Vec2EmotionClassifier -import librosa - -# Charger le modèle et le processeur -device = torch.device("cuda" if torch.cuda.is_available() else "cpu") -processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-xlsr-53") -model = Wav2Vec2EmotionClassifier() -model.load_state_dict(torch.load("wav2vec2_emotion.pth")) -model.to(device) -model.eval() - -emotion_labels = ["joie", "colère", "neutre"] - -def predict_emotion(audio_path): - waveform, _ = librosa.load(audio_path, sr=16000) - input_values = processor(waveform, return_tensors="pt", sampling_rate=16000).input_values - input_values = input_values.to(device) - - with torch.no_grad(): - outputs = model(input_values) - - predicted_label = torch.argmax(outputs, dim=1).item() - return emotion_labels[predicted_label] - -# Exemple d'utilisation -audio_test = "data/n1ac.wav" -emotion = predict_emotion(audio_test) -print(f"Émotion détectée : {emotion}") diff --git a/src/model/test_wav2vec.py b/src/model/test_wav2vec.py deleted file mode 100644 index 207bc829d6058a375060557fbd07aec8f3a2d0ba..0000000000000000000000000000000000000000 --- a/src/model/test_wav2vec.py +++ /dev/null @@ -1,62 +0,0 @@ -from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC -import torch -import librosa -import numpy as np -import matplotlib.pyplot as plt - -# Charger le modèle et le processeur Wav2Vec 2.0 -model_name = "facebook/wav2vec2-large-xlsr-53-french" -processor = Wav2Vec2Processor.from_pretrained(model_name) -model = Wav2Vec2ForCTC.from_pretrained(model_name) - -# Charger l'audio -audio_file = "C:\\Users\\fkpamegan\\Downloads\\datasets_oreau2_m_sessp_07a01Pa.wav" -y, sr = librosa.load(audio_file, sr=16000) - -# Prétraiter l'audio avec le processeur Wav2Vec 2.0 -input_values = processor(y, return_tensors="pt").input_values - -# Obtenir la prédiction (logits) -with torch.no_grad(): - logits = model(input_values).logitsa - -# Obtenir les IDs des tokens prédits (transcription) -predicted_ids = torch.argmax(logits, dim=-1) - -# Décoder les IDs pour obtenir le texte transcrit -transcription = processor.decode(predicted_ids[0]) - -print("Transcription:", transcription) - - -# Extraire le pitch (hauteur tonale) et l'intensité -pitch, magnitudes = librosa.core.piptrack(y=y, sr=sr) -intensity = librosa.feature.rms(y=y) # Intensité (volume) - -# Calculer le tempo (vitesse de parole) -tempo, _ = librosa.beat.beat_track(y=y, sr=sr) - -# Affichage du pitch -plt.figure(figsize=(10, 6)) -librosa.display.specshow(pitch, x_axis='time', y_axis='log') -plt.colorbar() -plt.title("Pitch (Hauteur Tonale)") -plt.show() - -# Affichage de l'intensité -plt.figure(figsize=(10, 6)) -librosa.display.specshow(intensity, x_axis='time') -plt.colorbar() -plt.title("Intensité") -plt.show() - -# Fusionner la transcription avec les caractéristiques prosodiques (pitch, intensité, tempo) -features = np.hstack([ - np.mean(intensity, axis=1), # Moyenne de l'intensité - np.mean(pitch, axis=1), # Moyenne du pitch - tempo # Tempo -]) - -# Afficher les caractéristiques extraites -print("Caractéristiques combinées :") -print(features) diff --git a/src/model/train.py b/src/model/train.py deleted file mode 100644 index c1a0c7ae87432c4849ec15e98222c9096319f77f..0000000000000000000000000000000000000000 --- a/src/model/train.py +++ /dev/null @@ -1,51 +0,0 @@ -import torch -import torch.optim as optim -import torch.nn as nn -from torch.utils.data import DataLoader -from transformers import Wav2Vec2Processor -from emotion_dataset import EmotionDataset -from emotion_classifier import Wav2Vec2EmotionClassifier -import os -from utils import collate_fn - - -# Charger le processeur et le dataset -processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-xlsr-53-french") -data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data", "dataset.csv")) -if not os.path.exists(data_path): - raise FileNotFoundError(f"Le fichier {data_path} est introuvable.") - -dataset = EmotionDataset(data_path, processor) -dataloader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=collate_fn) # collate_fn ajouté - - -# Initialiser le modèle -device = torch.device("cuda" if torch.cuda.is_available() else "cpu") -model = Wav2Vec2EmotionClassifier().to(device) - -# Définir la fonction de perte et l'optimiseur -criterion = nn.CrossEntropyLoss() -optimizer = optim.AdamW(model.parameters(), lr=5e-5) - -# Entraînement du modèle -num_epochs = 10 -for epoch in range(num_epochs): - model.train() - total_loss = 0 - - for inputs, labels in dataloader: - inputs, labels = inputs.to(device), labels.to(device) - - optimizer.zero_grad() - outputs = model(inputs) - loss = criterion(outputs, labels) - loss.backward() - optimizer.step() - - total_loss += loss.item() - - print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}") - -# Sauvegarde du modèle -torch.save(model.state_dict(), "wav2vec2_emotion.pth") -print("Modèle sauvegardé !") diff --git a/src/model/utils.py b/src/model/utils.py deleted file mode 100644 index d5f1a5b6ec3376c8ecb16c2e1252161d709a0452..0000000000000000000000000000000000000000 --- a/src/model/utils.py +++ /dev/null @@ -1,8 +0,0 @@ -import torch -from torch.nn.utils.rnn import pad_sequence - -def collate_fn(batch): - inputs, labels = zip(*batch) # Séparer les features et les labels - inputs = pad_sequence(inputs, batch_first=True, padding_value=0) # Padding des audios - labels = torch.tensor(labels, dtype=torch.long) # Conversion en tensor - return inputs, labels diff --git a/test_speech.py b/test_speech.py new file mode 100644 index 0000000000000000000000000000000000000000..b4fc4d9f0f2b79e64627eb7a0fa2beeb6f466bd5 --- /dev/null +++ b/test_speech.py @@ -0,0 +1,49 @@ +import torch +import torchaudio +from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification +import os + +# 🔹 Paramètres +MODEL_NAME = "./wav2vec2_emotion" # Chemin du modèle sauvegardé +LABELS = ["colere", "joie", "neutre"] # Les classes + +# 🔹 Charger le processeur et le modèle +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME) +model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_NAME).to(device) +model.eval() # Mode évaluation + + +def predict_emotion(audio_path): + # Charger l'audio + waveform, sample_rate = torchaudio.load(audio_path) + + # Prétraitement du son + inputs = processor( + waveform.squeeze().numpy(), + sampling_rate=sample_rate, + return_tensors="pt", + padding=True, + truncation=True, + max_length=32000 # Ajuste selon la durée de tes fichiers + ) + + # Envoyer les données sur le bon device (CPU ou GPU) + input_values = inputs["input_values"].to(device) + + # Prédiction + with torch.no_grad(): + logits = model(input_values).logits + + # Trouver l'émotion prédite + predicted_class = torch.argmax(logits, dim=-1).item() + + return LABELS[predicted_class] # Retourne le label correspondant + +base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "data")) +audio_file = os.path.join(base_path, "colere", "c1ac.wav") +predicted_emotion = predict_emotion(audio_file) +print(f"🎙️ Émotion prédite : {predicted_emotion}") + + + diff --git a/train.py b/train.py new file mode 100644 index 0000000000000000000000000000000000000000..662c2611938a998e1eefdf73d521483c154d5b9d --- /dev/null +++ b/train.py @@ -0,0 +1,80 @@ +import torch +import torch.optim as optim +import torch.nn as nn +from torch.utils.data import DataLoader +from sklearn.metrics import accuracy_score +from utils.dataset import load_audio_data +from utils.preprocessing import preprocess_audio, prepare_features, collate_fn +from model.emotion_classifier import EmotionClassifier +from config import DEVICE, NUM_LABELS, BEST_MODEL_NAME +import os + +# Charger les données et les séparer en train / test +data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "dataset")) +ds = load_audio_data(data_dir) + +# Prétraitement +ds["train"] = ds["train"].map(preprocess_audio).map(lambda batch: prepare_features(batch, max_length=128)) +ds["test"] = ds["test"].map(preprocess_audio).map(lambda batch: prepare_features(batch, max_length=128)) + +# DataLoader +train_loader = DataLoader(ds["train"], batch_size=8, shuffle=True, collate_fn=collate_fn) +test_loader = DataLoader(ds["test"], batch_size=8, shuffle=False, collate_fn=collate_fn) + +# Instancier le modèle +classifier = EmotionClassifier(feature_dim=40, num_labels=NUM_LABELS).to(DEVICE) + +# Fonction d'entraînement +def train_classifier(classifier, train_loader, test_loader, epochs=20): + optimizer = optim.AdamW(classifier.parameters(), lr=2e-5, weight_decay=0.01) + loss_fn = nn.CrossEntropyLoss() + best_accuracy = 0.0 + + for epoch in range(epochs): + classifier.train() + total_loss, correct = 0, 0 + + for inputs, labels in train_loader: + inputs, labels = inputs.to(DEVICE), labels.to(DEVICE) + optimizer.zero_grad() + + logits = classifier(inputs) + loss = loss_fn(logits, labels) + + loss.backward() + optimizer.step() + + total_loss += loss.item() + correct += (logits.argmax(dim=-1) == labels).sum().item() + + train_acc = correct / len(train_loader.dataset) + + if train_acc > best_accuracy: + best_accuracy = train_acc + torch.save(classifier.state_dict(), BEST_MODEL_NAME) + print(f"✔️ Nouveau meilleur modèle sauvegardé ! Accuracy: {best_accuracy:.4f}") + + print(f"📢 Epoch {epoch+1}/{epochs} - Loss: {total_loss:.4f} - Accuracy: {train_acc:.4f}") + + return classifier + +# Évaluer le modèle +def evaluate(model, test_loader): + model.eval() + all_preds, all_labels = [], [] + + with torch.no_grad(): + for inputs, labels in test_loader: + inputs, labels = inputs.to(DEVICE), labels.to(DEVICE) + + logits = model(inputs) + preds = torch.argmax(logits, dim=-1).cpu().numpy() + all_preds.extend(preds) + all_labels.extend(labels.cpu().numpy()) + + return accuracy_score(all_labels, all_preds) + +# Lancer l'entraînement +trained_classifier = train_classifier(classifier, train_loader, test_loader, epochs=20) + +print("✅ Entraînement terminé, le meilleur modèle a été sauvegardé !") diff --git a/train_speech.py b/train_speech.py new file mode 100644 index 0000000000000000000000000000000000000000..56a304fd72673b58f2ca5434aa2016e2fb9f81b0 --- /dev/null +++ b/train_speech.py @@ -0,0 +1,88 @@ +import torch +import torchaudio +import os +from datasets import Dataset, DatasetDict +from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification, TrainingArguments, Trainer + +# 🔹 Paramètres +MODEL_NAME = "facebook/wav2vec2-large-xlsr-53-french" +NUM_LABELS = 3 # Nombre de classes émotionnelles +BATCH_SIZE = 8 +EPOCHS = 10 +LEARNING_RATE = 1e-4 +MAX_LENGTH = 32000 # Ajuste selon la durée de tes fichiers audio + +# 🔹 Vérifier GPU dispo +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# 🔹 Charger le processeur et le modèle +processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME) +model = Wav2Vec2ForSequenceClassification.from_pretrained( + MODEL_NAME, + num_labels=NUM_LABELS, + problem_type="single_label_classification" +).to(device) + +# 🔹 Fonction pour charger les fichiers audio sans CSV +def load_audio_data(data_dir): + data = {"file_path": [], "label": []} + labels = ["colere", "joie", "neutre"] # Ajuste selon tes classes + + for label in labels: + folder_path = os.path.join(data_dir, label) + for file in os.listdir(folder_path): + if file.endswith(".wav"): + data["file_path"].append(os.path.join(folder_path, file)) + data["label"].append(labels.index(label)) + + dataset = Dataset.from_dict(data) + train_test_split = dataset.train_test_split(test_size=0.2) # 80% train, 20% test + return DatasetDict({"train": train_test_split["train"], "test": train_test_split["test"]}) + +# 🔹 Prétraitement de l'audio +def preprocess_audio(file_path): + waveform, sample_rate = torchaudio.load(file_path) + inputs = processor( + waveform.squeeze().numpy(), + sampling_rate=sample_rate, + return_tensors="pt", + padding=True, + truncation=True, + max_length=MAX_LENGTH # ✅ Correction de l'erreur + ) + return inputs["input_values"][0] # Récupère les valeurs audio prétraitées + +# 🔹 Charger et prétraiter le dataset +data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "data")) +ds = load_audio_data(data_dir) + +def preprocess_batch(batch): + batch["input_values"] = preprocess_audio(batch["file_path"]) + return batch + +ds = ds.map(preprocess_batch, remove_columns=["file_path"]) + +# 🔹 Définir les arguments d'entraînement +training_args = TrainingArguments( + output_dir="./wav2vec2_emotion", + evaluation_strategy="epoch", + save_strategy="epoch", + learning_rate=LEARNING_RATE, + per_device_train_batch_size=BATCH_SIZE, + per_device_eval_batch_size=BATCH_SIZE, + num_train_epochs=EPOCHS, + save_total_limit=2, + logging_dir="./logs", + logging_steps=10, +) + +# 🔹 Définir le trainer +trainer = Trainer( + model=model, + args=training_args, + train_dataset=ds["train"], + eval_dataset=ds["test"], +) + +# 🚀 Lancer l'entraînement +trainer.train() diff --git a/utils.py b/utils.py deleted file mode 100644 index 2e2ae4592696a08387bdf15df0a5583e669047ee..0000000000000000000000000000000000000000 --- a/utils.py +++ /dev/null @@ -1,4 +0,0 @@ -import streamlit as st -import datetime - - diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1 @@ + diff --git a/utils/dataset.py b/utils/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..843b68828f4a772b0d227115dd98cf32cda90e59 --- /dev/null +++ b/utils/dataset.py @@ -0,0 +1,44 @@ +import os +from datasets import Dataset +from config import LABELS +import pandas as pd + +def load_audio_data(data_dir): + data = [] + for label_name, label_id in LABELS.items(): + label_dir = os.path.join(data_dir, label_name) + for file in os.listdir(label_dir): + if file.endswith(".wav"): + file_path = os.path.join(label_dir, file) + data.append({"path": file_path, "label": label_id}) + + # Convertir en dataset Hugging Face + ds = Dataset.from_list(data) + + # Séparer en 80% train / 20% test + ds = ds.train_test_split(test_size=0.2) + return ds # Contient ds["train"] et ds["test"] + + + + +# def load_audio_data_from_csv(csv_path, data_dir): +# data = [] +# df = pd.read_csv(csv_path, sep=",", header=0) +# print(df.head()) + +# for _, row in df.iterrows(): +# file_path = os.path.join(data_dir, row["dossier"]) +# label = row["emotion"] + +# if os.path.exists(file_path) and label in LABELS: +# data.append({"path": file_path, "label": LABELS[label]}) +# else: +# print(f"⚠️ Fichier manquant ou label inconnu : {file_path} - {label}") + +# return Dataset.from_list(data) + +# #Charger le dataset à partir du CSV +# csv_path = os.path.abspath(os.path.join(os.path.dirname(file), "new_data", "dataset.csv")) +# data_dir = os.path.abspath(os.path.join(os.path.dirname(file), "new_data")) +# ds = load_audio_data_from_csv(csv_path, data_dir) diff --git a/utils/preprocessing.py b/utils/preprocessing.py new file mode 100644 index 0000000000000000000000000000000000000000..f3a5252b24bb655a24e2c60b4cde18b4859a6d10 --- /dev/null +++ b/utils/preprocessing.py @@ -0,0 +1,82 @@ +import librosa +import soundfile as sf +import torch +import torchaudio +import numpy as np +from model.feature_extractor import processor # type: ignore +from config import DEVICE + +# Resampler pour convertir en 16kHz +resampler = torchaudio.transforms.Resample(orig_freq=48_000, new_freq=16_000) + +def preprocess_audio(batch): + speech, sample_rate = sf.read(batch["path"], dtype="float32") + + # Convertir en numpy array si ce n'est pas déjà le cas + speech = np.array(speech, dtype=np.float32) + + # Vérifier que le format est bien float32 + if speech.dtype != np.float32: + raise ValueError(f"Le fichier {batch['path']} n'est pas en float32.") + + # Resampling si nécessaire + if sample_rate != 16000: + speech = torch.tensor(speech).unsqueeze(0) # Ajouter une dimension pour le resampler + speech = resampler(speech).squeeze(0).numpy() # Appliquer le resampler et reconvertir en numpy array + + batch["speech"] = speech + batch["sampling_rate"] = 16000 + return batch + + +def prepare_features(batch, max_length): + y, sr = batch["speech"], 16000 + + # S'assurer que y est bien un numpy array float32 + if not isinstance(y, np.ndarray): + y = np.array(y, dtype=np.float32) + + # Vérifier que les valeurs sont bien en float32 + y = y.astype(np.float32) + + # Extraction des MFCCs + mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40) + + # Debugging: afficher la forme des MFCCs + # print(f"MFCC original shape: {mfcc.shape}") + + # Ajuster la longueur des MFCCs + if mfcc.shape[1] > max_length: + mfcc = mfcc[:, :max_length] # Tronquer si trop long + else: + pad_width = max_length - mfcc.shape[1] + mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant') # Padding si trop court + + # print(f"MFCC padded shape: {mfcc.shape}") + + # Convertir en tensor PyTorch et stocker + batch["input_values"] = torch.tensor(mfcc.T, dtype=torch.float32) # Transposer pour obtenir (max_length, 40) + return batch + + +def collate_fn(batch): + """ Fonction pour assembler les batchs avec padding des features """ + + # Récupérer les features (MFCC) et les labels + inputs = [np.array(sample["input_values"], dtype=np.float32) for sample in batch] + labels = torch.tensor([sample["label"] for sample in batch], dtype=torch.long) + + # Vérifier que inputs est bien une liste de numpy arrays + #print(f"Types des inputs: {[type(x) for x in inputs]}") # Debugging + + # Trouver la longueur max des MFCC dans ce batch + max_length = max([x.shape[0] for x in inputs]) + + # Appliquer un padding avec des zéros pour uniformiser les tailles + padded_inputs = [np.pad(x, ((0, max_length - x.shape[0]), (0, 0)), mode="constant") for x in inputs] + + # Convertir en tenseur PyTorch + inputs_tensor = torch.tensor(padded_inputs, dtype=torch.float32) + + return inputs_tensor, labels + diff --git a/views/about.py b/views/about.py deleted file mode 100644 index 6d37b1949b2c9f16f7ae9b4d2c969a63888e53a0..0000000000000000000000000000000000000000 --- a/views/about.py +++ /dev/null @@ -1,21 +0,0 @@ -import streamlit as st - - -def about(): - st.title("About") - - col1, col2 = st.columns(2) - - with col1: - st.markdown("### About") - st.write("This dashboard is maintained by the M2 SISE team.") - st.write("For more information, please visit the [GitHub repository](https://github.com/jdalfons/sise-ultimate-challenge/tree/main).") - - with col2: - st.markdown("### Collaborators") - st.write(""" - - [Falonne Kpamegan](https://github.com/marinaKpamegan) - - [Nancy](https://github.com/yminanc) - - [Cyril](https://github.com/Cyr-CK) - - [Juan Alfonso](https://github.com/jdalfons) - """) \ No newline at end of file diff --git a/views/application.py b/views/application.py deleted file mode 100644 index 2b49a1f2b32055097ae45cc0b65be6b2d9a2a054..0000000000000000000000000000000000000000 --- a/views/application.py +++ /dev/null @@ -1,40 +0,0 @@ -import streamlit as st -import datetime -import os - -DIRECTORY = "audios" -FILE_NAME = "audio.wav" - -def application(): - st.title("SISE ultimate challenge") - st.write("C'est le dernier challenge de la formation SISE.") - st.markdown(""" - **Overview:** - - Analyse de logs - - Analyse de données - - Machine learning - """) - - st.markdown("---") - - tab1, tab2 = st.tabs(["Record Audio", "Register Audio"]) - - with tab1: - st.header("Record Audio") - st.write("Here you can record audio.") - audio_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "ogg"]) - if audio_file is not None: - - with open(f"audios/{FILE_NAME}", "wb") as f: - f.write(audio_file.getbuffer()) - st.success(f"Saved file: {FILE_NAME}") - - with tab2: - st.header("Register Audio") - st.write("Here you can register audio.") - - - file = os.path.join(DIRECTORY, FILE_NAME) - if os.path.exists(file): - st.markdown("## File registered:") - audio_data = st.audio(file, format='audio/wav', start_time=0) diff --git a/views/real_time.py b/views/real_time.py deleted file mode 100644 index 51f589c1a71614080634c7cab7e78e3754852905..0000000000000000000000000000000000000000 --- a/views/real_time.py +++ /dev/null @@ -1,80 +0,0 @@ -################################ -### NOT YET TESTED -############################### - -import streamlit as st -import pyaudio -import wave -import torch -from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor -import numpy as np -import time - -# Charger le modèle Wav2Vec2 pour la classification des émotions -model_name = "superb/wav2vec2-base-superb-er" # Exemple de modèle pour la reconnaissance des émotions -processor = Wav2Vec2Processor.from_pretrained(model_name) -model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name) - -# Paramètres audio -CHUNK = 1024 -FORMAT = pyaudio.paInt16 -CHANNELS = 1 -RATE = 16000 - -# Fonction pour prédire l'émotion à partir d'un segment audio -def predict_emotion(audio_data): - inputs = processor(audio_data, sampling_rate=RATE, return_tensors="pt", padding=True) - with torch.no_grad(): - logits = model(**inputs).logits - predicted_id = torch.argmax(logits, dim=-1).item() - emotion = model.config.id2label[predicted_id] - return emotion - -# Interface Streamlit -st.title("Détection des émotions en temps réel") - -# Boutons pour démarrer et arrêter l'enregistrement -start_button = st.button("Démarrer l'enregistrement") -stop_button = st.button("Arrêter l'enregistrement") - -# Zone de visualisation des émotions en temps réel -emotion_placeholder = st.empty() -final_emotion_placeholder = st.empty() - -if start_button: - st.write("Enregistrement en cours...") - audio = pyaudio.PyAudio() - stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) - - frames = [] - real_time_emotions = [] - - while not stop_button: - data = stream.read(CHUNK) - frames.append(data) - - # Traitement en temps réel (par tranche de 1 seconde) - if len(frames) >= RATE // CHUNK: - audio_segment = np.frombuffer(b''.join(frames[-(RATE // CHUNK):]), dtype=np.int16) - emotion = predict_emotion(audio_segment) - real_time_emotions.append(emotion) - emotion_placeholder.line_chart(real_time_emotions) # Affichage graphique des émotions - - # Arrêt de l'enregistrement - stream.stop_stream() - stream.close() - audio.terminate() - - # Sauvegarde de l'audio enregistré - wf = wave.open("output.wav", "wb") - wf.setnchannels(CHANNELS) - wf.setsampwidth(audio.get_sample_size(FORMAT)) - wf.setframerate(RATE) - wf.writeframes(b"".join(frames)) - wf.close() - - # Prédiction finale sur tout l'audio enregistré - full_audio_data = np.frombuffer(b''.join(frames), dtype=np.int16) - final_emotion = predict_emotion(full_audio_data) - - final_emotion_placeholder.write(f"Émotion finale prédite : {final_emotion}")