Spaces:

jdalfonso
/

SISE-ULTIMATE-CHALLENGE

Sleeping

App Files Files Community

jdalfonso commited on Mar 14

Commit

201ed31

1 Parent(s): 6855218

:rocket: feature new interface

Browse files

Files changed (27) hide show

.gitignore +1 -1
.streamlit/config.toml +1 -1
app.py +243 -40
src/config.py → config.py +1 -1
img copy/logo_01.png +0 -0
__init__.py → model/__init__.py +0 -0
{src/model → model}/emotion_classifier.py +0 -0
{src/model → model}/feature_extractor.py +1 -1
{src/model → model}/transcriber.py +0 -0
src/predict.py → predict.py +2 -2
src/__init__.py +0 -0
src/data/dataset.csv +0 -113
src/data/processing.ipynb +0 -113
src/model/__init__.py +0 -0
src/predictions/feedback.csv +0 -1
src/test_backend.ipynb +0 -63
test_speech.py +49 -0
src/train.py → train.py +0 -0
train_speech.py +88 -0
utils.py +0 -4
{src/utils → utils}/__init__.py +0 -0
{src/utils → utils}/dataset.py +1 -1
{src/utils → utils}/preprocessing.py +2 -2
views/about.py +0 -21
views/emotion_analysis.py +0 -150
views/real_time.py +0 -327
views/studio.py +0 -176

.gitignore CHANGED Viewed

@@ -180,7 +180,7 @@ old/
 *.wav
 data/*
 *.pth
 # Mac
 .DS_Store
 .idea

 *.wav
 data/*
 *.pth
+old/
 # Mac
 .DS_Store
 .idea

.streamlit/config.toml CHANGED Viewed

@@ -1,4 +1,4 @@
 [theme]
-base="dark"
 primaryColor="#7c99b4"

 [theme]
+base="light"
 primaryColor="#7c99b4"

app.py CHANGED Viewed

@@ -1,45 +1,248 @@
 import streamlit as st
-from streamlit_option_menu import option_menu
-from views.studio import studio
-from views.emotion_analysis import emotion_analysis
-from views.about import about
 import os
-import sys
-sys.path.append(os.path.abspath("src"))
-sys.path.append(os.path.abspath("."))
-if "model_loaded" not in st.session_state:
-    st.session_state.model_loaded = None
-# Set the logo
-st.sidebar.image("img/logo.png", use_container_width=True)
-# Create a sidebar with navigation options
-# Sidebar navigation with streamlit-option-menu
-with st.sidebar:
-    # st.image("img/logo.png", use_container_width=True)
-    # st.markdown("<h1 style='text-align: center;'>SecureIA Dashboard</h1>", unsafe_allow_html=True)
-    # Navigation menu with icons
-    selected_tab = option_menu(
-        menu_title=None,  # Added menu_title parameter
-        options=["Studio", "Emotion Analysis", "About"],
-        icons=["record-circle", "robot", "info-circle"],
-        menu_icon="cast",
-        default_index=0,
-        # styles={
-        # "container": {"padding": "5px", "background-color": "#f0f2f6"},
-        # "icon": {"color": "orange", "font-size": "18px"},
-        # "nav-link": {"font-size": "16px", "text-align": "left", "margin": "0px", "color": "black"},
-        # "nav-link-selected": {"background-color": "#4CAF50", "color": "white"},
-        # }
     )
-if selected_tab == "Studio":
-    studio()
-elif selected_tab == "Emotion Analysis":
-    emotion_analysis()
-elif selected_tab == "About":
-    about()

 import streamlit as st
+import pandas as pd
+import numpy as np
 import os
+import time
+import matplotlib.pyplot as plt
+from datetime import datetime
+import tempfile
+import io
+import json
+from model.transcriber import transcribe_audio
+from predict import predict_emotion
+# You'll need to install this package:
+# pip install streamlit-audiorec
+from st_audiorec import st_audiorec
+# Page configuration
+st.set_page_config(
+    page_title="Emotion Analyser",
+    page_icon="🎤",
+    layout="wide"
+)
+# Initialize session state variables if they don't exist
+if 'audio_data' not in st.session_state:
+    st.session_state.audio_data = []
+if 'current_audio_index' not in st.session_state:
+    st.session_state.current_audio_index = -1
+if 'audio_history_csv' not in st.session_state:
+    # Define columns for our CSV storage
+    st.session_state.audio_history_csv = pd.DataFrame(
+        columns=['timestamp', 'file_path', 'transcription', 'emotion', 'probabilities']
+    )
+if 'needs_rerun' not in st.session_state:
+    st.session_state.needs_rerun = False
+# Function to ensure we keep only the last 10 entries
+def update_audio_history(new_entry):
+    # Add the new entry
+    st.session_state.audio_history_csv = pd.concat([st.session_state.audio_history_csv, pd.DataFrame([new_entry])], ignore_index=True)
+    # Keep only the last 10 entries
+    if len(st.session_state.audio_history_csv) > 10:
+        st.session_state.audio_history_csv = st.session_state.audio_history_csv.iloc[-10:]
+    # Save to CSV
+    st.session_state.audio_history_csv.to_csv('audio_history.csv', index=False)
+# Function to process audio and get results
+def process_audio(audio_path):
+    try:
+        # Get transcription
+        transcription = transcribe_audio(audio_path)
+        # Get emotion prediction
+        predicted_emotion, probabilities = predict_emotion(audio_path)
+        # Update audio history
+        new_entry = {
+            'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+            'file_path': audio_path,
+            'transcription': transcription,
+            'emotion': predicted_emotion,
+            'probabilities': str(probabilities)  # Convert dict to string for storage
+        }
+        update_audio_history(new_entry)
+        # Update current index
+        st.session_state.current_audio_index = len(st.session_state.audio_history_csv) - 1
+        return transcription, predicted_emotion, probabilities
+    except Exception as e:
+        st.error(f"Error processing audio: {str(e)}")
+        return None, None, None
+# Function to split audio into 10-second segments
+def split_audio(audio_file, segment_length=10):
+    # This is a placeholder - in a real implementation, you'd use a library like pydub
+    # to split the audio file into segments
+    st.warning("Audio splitting functionality is a placeholder. Implement with pydub or similar library.")
+    # For now, we'll just return the whole file as a single segment
+    return [audio_file]
+# Function to display emotion visualization
+def display_emotion_chart(probabilities):
+    emotions = list(probabilities.keys())
+    values = list(probabilities.values())
+    fig, ax = plt.subplots(figsize=(10, 5))
+    bars = ax.bar(emotions, values, color=['red', 'gray', 'green'])
+    # Add data labels on top of bars
+    for bar in bars:
+        height = bar.get_height()
+        ax.text(bar.get_x() + bar.get_width()/2., height + 0.02,
+                f'{height:.2f}', ha='center', va='bottom')
+    ax.set_ylim(0, 1.1)
+    ax.set_ylabel('Probability')
+    ax.set_title('Emotion Prediction Results')
+    st.pyplot(fig)
+# Trigger rerun if needed (replaces experimental_rerun)
+if st.session_state.needs_rerun:
+    st.session_state.needs_rerun = False
+    st.rerun()  # Using st.rerun() instead of experimental_rerun
+# Main App Layout
+st.image("./img/logo_01.png", width=400)
+# Create two columns for the main layout
+col1, col2 = st.columns([1, 1])
+with col1:
+    st.header("Audio Input")
+    # Method selection
+    tab1, tab2 = st.tabs(["Record Audio", "Upload Audio"])
+    with tab1:
+        st.write("Record your audio (max 10 seconds):")
+        # Using streamlit-audiorec for better recording functionality
+        wav_audio_data = st_audiorec()
+        if wav_audio_data is not None:
+            # Save the recorded audio to a temporary file
+            with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
+                tmp_file.write(wav_audio_data)
+                tmp_file_path = tmp_file.name
+            st.success("Audio recorded successfully!")
+            # Process button
+            if st.button("Process Recorded Audio"):
+                # Process the audio
+                with st.spinner("Processing audio..."):
+                    transcription, emotion, probs = process_audio(tmp_file_path)
+                    # Set flag for rerun instead of calling experimental_rerun
+                    if transcription is not None:
+                        st.success("Audio processed successfully!")
+                        st.session_state.needs_rerun = True
+    with tab2:
+        uploaded_file = st.file_uploader("Upload an audio file (WAV format)", type=['wav'])
+        if uploaded_file is not None:
+            # Save the uploaded file to a temporary location
+            with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
+                tmp_file.write(uploaded_file.getbuffer())
+                tmp_file_path = tmp_file.name
+            st.audio(uploaded_file, format="audio/wav")
+            # Process button
+            if st.button("Process Uploaded Audio"):
+                # Split audio into 10-second segments
+                with st.spinner("Processing audio..."):
+                    segments = split_audio(tmp_file_path)
+                    # Process each segment
+                    for i, segment_path in enumerate(segments):
+                        st.write(f"Processing segment {i+1}...")
+                        transcription, emotion, probs = process_audio(segment_path)
+                    # Set flag for rerun instead of calling experimental_rerun
+                    st.success("Audio processed successfully!")
+                    st.session_state.needs_rerun = True
+with col2:
+    st.header("Results")
+    # Display results if available
+    if st.session_state.current_audio_index >= 0 and len(st.session_state.audio_history_csv) > 0:
+        current_data = st.session_state.audio_history_csv.iloc[st.session_state.current_audio_index]
+        # Transcription
+        st.subheader("Transcription")
+        st.text_area("", value=current_data['transcription'], height=100, key="transcription_area")
+        # Emotion
+        st.subheader("Detected Emotion")
+        st.info(f"🎭 Predicted emotion: **{current_data['emotion']}**")
+        # Convert string representation of dict back to actual dict
+        try:
+            import ast
+            probs = ast.literal_eval(current_data['probabilities'])
+            display_emotion_chart(probs)
+        except Exception as e:
+            st.error(f"Error parsing probabilities: {str(e)}")
+            st.write(f"Raw probabilities: {current_data['probabilities']}")
+    else:
+        st.info("Record or upload audio to see results")
+# Audio History and Analytics Section
+st.header("Audio History and Analytics")
+if len(st.session_state.audio_history_csv) > 0:
+    # Display a select box to choose from audio history
+    timestamps = st.session_state.audio_history_csv['timestamp'].tolist()
+    selected_timestamp = st.selectbox(
+        "Select audio from history:",
+        options=timestamps,
+        index=len(timestamps) - 1  # Default to most recent
     )
+    # Update current index when selection changes
+    selected_index = st.session_state.audio_history_csv[
+        st.session_state.audio_history_csv['timestamp'] == selected_timestamp
+    ].index[0]
+    # Only update if different
+    if st.session_state.current_audio_index != selected_index:
+        st.session_state.current_audio_index = selected_index
+        st.session_state.needs_rerun = True
+    # Analytics button
+    if st.button("Run Analytics on Selected Audio"):
+        st.subheader("Analytics Results")
+        # Get the selected audio data
+        selected_data = st.session_state.audio_history_csv.iloc[selected_index]
+        # Display analytics (this is where you would add more sophisticated analytics)
+        st.write(f"Selected Audio: {selected_data['timestamp']}")
+        st.write(f"Emotion: {selected_data['emotion']}")
+        st.write(f"File Path: {selected_data['file_path']}")
+        # Add any additional analytics you want here
+        # Try to play the selected audio
+        try:
+            if os.path.exists(selected_data['file_path']):
+                st.audio(selected_data['file_path'], format="audio/wav")
+            else:
+                st.warning("Audio file not found - it may have been deleted or moved.")
+        except Exception as e:
+            st.error(f"Error playing audio: {str(e)}")
+else:
+    st.info("No audio history available. Record or upload audio to create history.")
+# Footer
+st.markdown("---")
+st.caption("Audio Emotion Analyzer - Processes audio in 10-second segments and predicts emotions")

src/config.py → config.py RENAMED Viewed

@@ -21,5 +21,5 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 MODEL_NAME = "facebook/wav2vec2-large-xlsr-53-french"
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-BEST_MODEL_NAME = os.path.join(BASE_DIR, "..", "best_model.pth")  # Monte d'un niveau pour aller à la racine

 MODEL_NAME = "facebook/wav2vec2-large-xlsr-53-french"
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+BEST_MODEL_NAME = os.path.join(BASE_DIR, "model","fr-speech-emotion-model.pth")  # Monte d'un niveau pour aller à la racine

img copy/logo_01.png ADDED Viewed

__init__.py → model/__init__.py RENAMED Viewed

File without changes

{src/model → model}/emotion_classifier.py RENAMED Viewed

File without changes

{src/model → model}/feature_extractor.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import torch
 from transformers import Wav2Vec2Model, Wav2Vec2Processor
-from src.config import MODEL_NAME, DEVICE
 processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
 feature_extractor = Wav2Vec2Model.from_pretrained(MODEL_NAME).to(DEVICE)

 import torch
 from transformers import Wav2Vec2Model, Wav2Vec2Processor
+from config import MODEL_NAME, DEVICE
 processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
 feature_extractor = Wav2Vec2Model.from_pretrained(MODEL_NAME).to(DEVICE)

{src/model → model}/transcriber.py RENAMED Viewed

File without changes

src/predict.py → predict.py RENAMED Viewed

@@ -4,8 +4,8 @@ import torch
 import librosa
 import numpy as np
 from model.emotion_classifier import EmotionClassifier
-from src.utils.preprocessing import collate_fn
-from src.config import DEVICE, NUM_LABELS, BEST_MODEL_NAME
 # Charger le modèle entraîné
 feature_dim = 40  # Nombre de MFCCs utilisés

 import librosa
 import numpy as np
 from model.emotion_classifier import EmotionClassifier
+from utils.preprocessing import collate_fn
+from config import DEVICE, NUM_LABELS, BEST_MODEL_NAME
 # Charger le modèle entraîné
 feature_dim = 40  # Nombre de MFCCs utilisés

src/__init__.py DELETED Viewed

File without changes

src/data/dataset.csv DELETED Viewed

@@ -1,113 +0,0 @@
-dossier,emotion
-colere/c1ac.wav,colere
-colere/c1af.wav,colere
-colere/c1aj.wav,colere
-colere/c1an.wav,colere
-colere/c1bc.wav,colere
-colere/c1bf.wav,colere
-colere/c1bj.wav,colere
-colere/c1bn.wav,colere
-colere/c1cc.wav,colere
-colere/c1cf.wav,colere
-colere/c1cj.wav,colere
-colere/c2ac.wav,colere
-colere/c2af.wav,colere
-colere/c2aj.wav,colere
-colere/c2an.wav,colere
-colere/c2bc.wav,colere
-colere/c2bf.wav,colere
-colere/c2bj.wav,colere
-colere/c2bn.wav,colere
-colere/c2cn.wav,colere
-colere/c3ac.wav,colere
-colere/c3af.wav,colere
-colere/c3aj.wav,colere
-colere/c3an.wav,colere
-colere/c3bc.wav,colere
-colere/c3bf.wav,colere
-colere/c3bj.wav,colere
-colere/c3bn.wav,colere
-colere/c4aaf.wav,colere
-colere/c4ac.wav,colere
-colere/c4af.wav,colere
-colere/c4aj.wav,colere
-colere/c4an.wav,colere
-colere/c4bc.wav,colere
-colere/c4bj.wav,colere
-colere/c4bn.wav,colere
-colere/c5an.wav,colere
-colere/c5c.wav,colere
-colere/c5f.wav,colere
-colere/c5j.wav,colere
-neutre/n1ac.wav,neutre
-neutre/n1af.wav,neutre
-neutre/n1aj.wav,neutre
-neutre/n1an.wav,neutre
-neutre/n1bc.wav,neutre
-neutre/n1bf.wav,neutre
-neutre/n1bj.wav,neutre
-neutre/n1bn.wav,neutre
-neutre/n2ac.wav,neutre
-neutre/n2af.wav,neutre
-neutre/n2aj.wav,neutre
-neutre/n2an.wav,neutre
-neutre/n2bc.wav,neutre
-neutre/n2bf.wav,neutre
-neutre/n2bj.wav,neutre
-neutre/n2bn.wav,neutre
-neutre/n3ac.wav,neutre
-neutre/n3af.wav,neutre
-neutre/n3aj.wav,neutre
-neutre/n3an.wav,neutre
-neutre/n3bc.wav,neutre
-neutre/n3bf.wav,neutre
-neutre/n3bj.wav,neutre
-neutre/n3bn.wav,neutre
-neutre/n4ac.wav,neutre
-neutre/n4aj.wav,neutre
-neutre/n4an.wav,neutre
-neutre/n4f.wav,neutre
-neutre/n5ac.wav,neutre
-neutre/n5af.wav,neutre
-neutre/n5aj.wav,neutre
-neutre/n5an.wav,neutre
-neutre/n5bc.wav,neutre
-neutre/n5bf.wav,neutre
-neutre/n5bj.wav,neutre
-neutre/n5bn.wav,neutre
-joie/h1ac.wav,joie
-joie/h1af.wav,joie
-joie/h1aj.wav,joie
-joie/h1an.wav,joie
-joie/h1bc.wav,joie
-joie/h1bf.wav,joie
-joie/h1bj.wav,joie
-joie/h1bn.wav,joie
-joie/h21f.wav,joie
-joie/h2ac.wav,joie
-joie/h2aj.wav,joie
-joie/h2an.wav,joie
-joie/h2bc.wav,joie
-joie/h2bf.wav,joie
-joie/h2bj.wav,joie
-joie/h2bn.wav,joie
-joie/h3ac.wav,joie
-joie/h3af.wav,joie
-joie/h3aj.wav,joie
-joie/h3anwav.wav,joie
-joie/h3bc.wav,joie
-joie/h3bf.wav,joie
-joie/h3bj.wav,joie
-joie/h3bn.wav,joie
-joie/h4ac.wav,joie
-joie/h4af.wav,joie
-joie/h4aj.wav,joie
-joie/h4an.wav,joie
-joie/h4bc.wav,joie
-joie/h4bf.wav,joie
-joie/h4bj.wav,joie
-joie/h4bn.wav,joie
-joie/h5an.wav,joie
-joie/h5c.wav,joie
-joie/h5f.wav,joie
-joie/h5j.wav,joie

src/data/processing.ipynb DELETED Viewed

@@ -1,113 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "RangeIndex: 112 entries, 0 to 111\n",
-      "Data columns (total 2 columns):\n",
-      " #   Column    Non-Null Count  Dtype \n",
-      "---  ------    --------------  ----- \n",
-      " 0   dossier   112 non-null    object\n",
-      " 1    emotion  112 non-null    object\n",
-      "dtypes: object(2)\n",
-      "memory usage: 1.9+ KB\n"
-     ]
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "\n",
-    "data = pd.read_csv('dataset.csv', sep=',', header=0)\n",
-    "\n",
-    "data.info()\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "             dossier emotion\n",
-      "0    colere/c1ac.wav  colere\n",
-      "1    colere/c1af.wav  colere\n",
-      "2    colere/c1aj.wav  colere\n",
-      "3    colere/c1an.wav  colere\n",
-      "4    colere/c1bc.wav  colere\n",
-      "..               ...     ...\n",
-      "107    joie/h4bn.wav    joie\n",
-      "108    joie/h5an.wav    joie\n",
-      "109     joie/h5c.wav    joie\n",
-      "110     joie/h5f.wav    joie\n",
-      "111     joie/h5j.wav    joie\n",
-      "\n",
-      "[112 rows x 2 columns]\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\Evidya\\AppData\\Local\\Temp\\ipykernel_24704\\3726049179.py:8: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n",
-      "  data = data.applymap(lambda x: x.strip() if isinstance(x, str) else x)\n"
-     ]
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "\n",
-    "\n",
-    "# 🔹 1. Supprimer les espaces des noms de colonnes\n",
-    "data.columns = data.columns.str.strip()\n",
-    "\n",
-    "# 🔹 2. Supprimer les espaces dans toutes les cellules (colonnes object)\n",
-    "data = data.applymap(lambda x: x.strip() if isinstance(x, str) else x)\n",
-    "\n",
-    "# Afficher le DataFrame corrigé\n",
-    "print(data)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# save to csv\n",
-    "data.to_csv('dataset.csv', index=False, sep=',')"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

src/model/__init__.py DELETED Viewed

File without changes

src/predictions/feedback.csv DELETED Viewed

	@@ -1 +0,0 @@
1	- filepath,prediction,feedback

src/test_backend.ipynb DELETED Viewed

@@ -1,63 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Transcription : tu as encore oublié de faire le dossier c'était hurgent nom de chien\n"
-     ]
-    }
-   ],
-   "source": [
-    "# make a transcription from audio file\n",
-    "from model.transcriber import transcribe_audio\n",
-    "import os\n",
-    "\n",
-    "base_path = os.path.abspath(os.path.join(\"data\"))\n",
-    "audio_path = os.path.join(base_path, \"colere\", \"c1af.wav\") # path to audio file\n",
-    "texte = transcribe_audio(audio_path)\n",
-    "print(f\"Transcription : {texte}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from predict import predict_emotion\n",
-    "\n",
-    "base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), \"data\"))\n",
-    "audio_file = os.path.join(base_path, \"colere\", \"c1ac.wav\")\n",
-    "emotion = predict_emotion(audio_file)\n",
-    "print(f\"🎤 L'émotion prédite est : {emotion}\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

test_speech.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import torch
+import torchaudio
+from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
+import os
+# 🔹 Paramètres
+MODEL_NAME = "./wav2vec2_emotion"  # Chemin du modèle sauvegardé
+LABELS = ["colere", "joie", "neutre"]  # Les classes
+# 🔹 Charger le processeur et le modèle
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
+model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_NAME).to(device)
+model.eval()  # Mode évaluation
+def predict_emotion(audio_path):
+    # Charger l'audio
+    waveform, sample_rate = torchaudio.load(audio_path)
+    # Prétraitement du son
+    inputs = processor(
+        waveform.squeeze().numpy(),
+        sampling_rate=sample_rate,
+        return_tensors="pt",
+        padding=True,
+        truncation=True,
+        max_length=32000  # Ajuste selon la durée de tes fichiers
+    )
+    # Envoyer les données sur le bon device (CPU ou GPU)
+    input_values = inputs["input_values"].to(device)
+    # Prédiction
+    with torch.no_grad():
+        logits = model(input_values).logits
+    # Trouver l'émotion prédite
+    predicted_class = torch.argmax(logits, dim=-1).item()
+    return LABELS[predicted_class]  # Retourne le label correspondant
+base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "data"))
+audio_file = os.path.join(base_path, "colere", "c1ac.wav")
+predicted_emotion = predict_emotion(audio_file)
+print(f"🎙️ Émotion prédite : {predicted_emotion}")

src/train.py → train.py RENAMED Viewed

File without changes

train_speech.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import torch
+import torchaudio
+import os
+from datasets import Dataset, DatasetDict
+from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification, TrainingArguments, Trainer
+# 🔹 Paramètres
+MODEL_NAME = "facebook/wav2vec2-large-xlsr-53-french"
+NUM_LABELS = 3  # Nombre de classes émotionnelles
+BATCH_SIZE = 8
+EPOCHS = 10
+LEARNING_RATE = 1e-4
+MAX_LENGTH = 32000  # Ajuste selon la durée de tes fichiers audio
+# 🔹 Vérifier GPU dispo
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# 🔹 Charger le processeur et le modèle
+processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
+model = Wav2Vec2ForSequenceClassification.from_pretrained(
+    MODEL_NAME,
+    num_labels=NUM_LABELS,
+    problem_type="single_label_classification"
+).to(device)
+# 🔹 Fonction pour charger les fichiers audio sans CSV
+def load_audio_data(data_dir):
+    data = {"file_path": [], "label": []}
+    labels = ["colere", "joie", "neutre"]  # Ajuste selon tes classes
+    for label in labels:
+        folder_path = os.path.join(data_dir, label)
+        for file in os.listdir(folder_path):
+            if file.endswith(".wav"):
+                data["file_path"].append(os.path.join(folder_path, file))
+                data["label"].append(labels.index(label))
+    dataset = Dataset.from_dict(data)
+    train_test_split = dataset.train_test_split(test_size=0.2)  # 80% train, 20% test
+    return DatasetDict({"train": train_test_split["train"], "test": train_test_split["test"]})
+# 🔹 Prétraitement de l'audio
+def preprocess_audio(file_path):
+    waveform, sample_rate = torchaudio.load(file_path)
+    inputs = processor(
+        waveform.squeeze().numpy(),
+        sampling_rate=sample_rate,
+        return_tensors="pt",
+        padding=True,
+        truncation=True,
+        max_length=MAX_LENGTH  # ✅ Correction de l'erreur
+    )
+    return inputs["input_values"][0]  # Récupère les valeurs audio prétraitées
+# 🔹 Charger et prétraiter le dataset
+data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "data"))
+ds = load_audio_data(data_dir)
+def preprocess_batch(batch):
+    batch["input_values"] = preprocess_audio(batch["file_path"])
+    return batch
+ds = ds.map(preprocess_batch, remove_columns=["file_path"])
+# 🔹 Définir les arguments d'entraînement
+training_args = TrainingArguments(
+    output_dir="./wav2vec2_emotion",
+    evaluation_strategy="epoch",
+    save_strategy="epoch",
+    learning_rate=LEARNING_RATE,
+    per_device_train_batch_size=BATCH_SIZE,
+    per_device_eval_batch_size=BATCH_SIZE,
+    num_train_epochs=EPOCHS,
+    save_total_limit=2,
+    logging_dir="./logs",
+    logging_steps=10,
+)
+# 🔹 Définir le trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=ds["train"],
+    eval_dataset=ds["test"],
+)
+# 🚀 Lancer l'entraînement
+trainer.train()

utils.py DELETED Viewed

@@ -1,4 +0,0 @@
-import streamlit as st
-import datetime

{src/utils → utils}/__init__.py RENAMED Viewed

File without changes

{src/utils → utils}/dataset.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import os
 from datasets import Dataset
-from src.config import LABELS
 import pandas as pd
 def load_audio_data(data_dir):

 import os
 from datasets import Dataset
+from config import LABELS
 import pandas as pd
 def load_audio_data(data_dir):

{src/utils → utils}/preprocessing.py RENAMED Viewed

@@ -3,8 +3,8 @@ import soundfile as sf
 import torch
 import torchaudio
 import numpy as np
-from src.model.feature_extractor import processor  # type: ignore
-from src.config import DEVICE
 # Resampler pour convertir en 16kHz
 resampler = torchaudio.transforms.Resample(orig_freq=48_000, new_freq=16_000)

 import torch
 import torchaudio
 import numpy as np
+from model.feature_extractor import processor  # type: ignore
+from config import DEVICE
 # Resampler pour convertir en 16kHz
 resampler = torchaudio.transforms.Resample(orig_freq=48_000, new_freq=16_000)

views/about.py DELETED Viewed

@@ -1,21 +0,0 @@
-import streamlit as st
-def about():
-    st.title("About")
-    col1, col2 = st.columns(2)
-    with col1:
-        st.markdown("### About")
-        st.write("This dashboard is maintained by the M2 SISE team.")
-        st.write("For more information, please visit the [GitHub repository](https://github.com/jdalfons/sise-ultimate-challenge/tree/main).")
-    with col2:
-        st.markdown("### Collaborators")
-        st.write("""
-        - [Falonne Kpamegan](https://github.com/marinaKpamegan)
-        - [Nancy Randriamiarijaona](https://github.com/yminanc)
-        - [Cyril Kocab](https://github.com/Cyr-CK)
-        - [Juan Alfonso](https://github.com/jdalfons)
-        """)

views/emotion_analysis.py DELETED Viewed

@@ -1,150 +0,0 @@
-import streamlit as st
-import pandas as pd
-import os
-import matplotlib.pyplot as plt
-import librosa
-from src.predict import predict_emotion
-DIRECTORY = "audios"
-FILE_NAME = "audio.wav"
-RATE = 16000
-def emotion_analysis():
-    st.header("❤️ Emotion Analysis")
-    if st.session_state.audio_file is None:
-        st.info("Please, upload or record an audio file in the studio tab")
-        st.stop()
-    else:
-        audio_file = st.session_state.audio_file
-        start_inference = st.button("Start emotion recogniton","inf_on_upl_btn")
-        emotion_labels = ["colere", "neutre", "joie"]
-        colors = ['#f71c1c', '#cac8c8', '#f6d60a']
-        if start_inference:
-            # Configuration Streamlit
-            with st.spinner("Real-time emotion analysis..."):
-                # uploaded_file = st.file_uploader("Choisissez un fichier audio", type=["wav", "mp3"])
-                if audio_file is not None:
-                    # Charger et rééchantillonner l'audio
-                    audio, sr = librosa.load(audio_file, sr=RATE)
-                    # chunk = audio_file
-                    # Paramètres de la fenêtre glissante
-                    window_size = 1  # 1 seconde de données
-                    hop_length = 0.5  # 0.5 secondes de chevauchement
-                    # Créer un graphique en temps réel
-                    fig, ax = plt.subplots()
-                    lines = [ax.plot([], [], label=emotion)[0] for emotion in emotion_labels]
-                    ax.set_ylim(0, 1)
-                    ax.set_xlim(0, len(audio) / sr)
-                    ax.set_xlabel("Temps (s)")
-                    ax.set_ylabel("Probabilité")
-                    chart = st.pyplot(fig)
-                    scores = [[],[],[]] # 3 émotions pour l'instant
-                    # Traitement par fenêtre glissante
-                    for i in range(0, len(audio), int(hop_length * sr)):
-                        chunk = audio[i:i + int(window_size * sr)]
-                        if len(chunk) < int(window_size * sr):
-                            break
-                        emotion_scores = predict_emotion(chunk, output_probs=True, sampling_rate=RATE)
-                        # Mettre à jour le graphique
-                        for emotion, line in zip(emotion_labels, lines):
-                            xdata = list(line.get_xdata())
-                            ydata = list(line.get_ydata())
-                            colour = colors[list(emotion_scores).index(emotion)]
-                            xdata.append(i / sr)
-                            ydata.append(emotion_scores[emotion])
-                            scores[list(emotion_scores).index(emotion)].append(emotion_scores[emotion])
-                            line.set_data(xdata, ydata)
-                            line.set_color(colour)
-                        ax.relim()
-                        ax.autoscale_view()
-                        ax.legend()
-                        chart.pyplot(fig, use_container_width=True)
-                    # Prepare the styling
-                    st.markdown("""
-                                <style>
-                                .colored-box {
-                                    padding: 10px;
-                                    border-radius: 5px;
-                                    color: white;
-                                    font-weight: bold;
-                                    text-align: center;
-                                }
-                                </style>
-                                """
-                                , unsafe_allow_html=True)
-                    # Dynamically create the specified number of columns
-                    columns = st.columns(len(emotion_scores))
-                    # emotion_scores_mean = [sum(sublist) / len(sublist) for sublist in scores]
-                    emotion_scores_mean = {emotion:sum(sublist) / len(sublist) for emotion, sublist in zip(emotion_labels, scores)}
-                    max_emo = max(emotion_scores_mean)
-                    emotion_scores_sorted = dict(sorted(emotion_scores_mean.items(), key=lambda x: x[1], reverse=True))
-                    colors_sorted = [colors[list(emotion_scores_mean.keys()).index(key)] for key in list(emotion_scores_sorted.keys())]
-                    # Add content to each column
-                    for i, (col, emotion) in enumerate(zip(columns, emotion_scores_sorted)):
-                        color = colors_sorted[i % len(colors_sorted)]  # Cycle through colors if more columns than colors
-                        col.markdown(f"""
-                                    <div class="colored-box" style="background-color: {color};">
-                                        {emotion} : {100*emotion_scores_sorted[emotion]:.2f} %
-                                    </div>
-                                    """
-                        , unsafe_allow_html=True)
-                    st.success("Analyse terminée !")
-                else:
-                    st.warning("You need to load an audio file !")
-        if start_inference:
-            st.subheader("Feedback")
-            # Initialisation du fichier CSV
-            csv_file = os.path.join("src","predictions","feedback.csv")
-            # Vérifier si le fichier CSV existe, sinon le créer avec des colonnes appropriées
-            if not os.path.exists(csv_file):
-                df = pd.DataFrame(columns=["filepath", "prediction", "feedback"])
-                df.to_csv(csv_file, index=False)
-            # Charger les données existantes du CSV
-            df = pd.read_csv(csv_file)
-            with st.form("feedback_form"):
-                st.write("What should have been the correct prediction ? (*Choose the same emotion if the prediction was correct*).")
-                feedback = st.selectbox("Your answer :", ['Sadness','Anger', 'Disgust', 'Fear', 'Surprise', 'Joy', 'Neutral'])
-                submit_button = st.form_submit_button("Submit")
-                st.write("En cliquant sur ce bouton, vous acceptez que votre audio soit sauvegardé dans notre base de données.")
-                if submit_button:
-                    # Ajouter le feedback au DataFrame
-                    new_entry = pd.DataFrame([{"filepath": audio_file.name, "prediction": max_emo, "feedback": feedback}])
-                    # df = df.append(new_entry, ignore_index=True)
-                    df = pd.concat([df, new_entry], ignore_index=True)
-                    # Sauvegarder les données mises à jour dans le fichier CSV
-                    df.to_csv(csv_file, index=False)
-                    # Sauvegarder le fichier audio
-                    with open(os.path.join("src","predictions","data",audio_file.name), "wb") as f:
-                        f.write(audio_file.getbuffer())
-                    # Confirmation pour l'utilisateur
-                    st.success("Merci pour votre retour ! Vos données ont été sauvegardées.")

views/real_time.py DELETED Viewed

@@ -1,327 +0,0 @@
-################################
-### Real time prediction for real time record
-###############################
-import streamlit as st
-import pyaudio
-import wave
-import torch
-from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
-import matplotlib.pyplot as plt
-import numpy as np
-import time
-# Paramètres audio
-CHUNK = 1024
-FORMAT = pyaudio.paInt16
-CHANNELS = 1
-RATE = 16000
-# Interface Streamlit
-st.title("Détection des émotions en temps réel")
-# Boutons pour démarrer et arrêter l'enregistrement
-start_button = st.button("Démarrer l'enregistrement")
-stop_button = st.button("Arrêter l'enregistrement")
-# Zone de visualisation des émotions en temps réel
-emotion_placeholder = st.empty()
-final_emotion_placeholder = st.empty()
-if start_button:
-    st.write("Enregistrement en cours...")
-    audio = pyaudio.PyAudio()
-    stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
-    frames = []
-    real_time_emotions = []
-    while not stop_button:
-        data = stream.read(CHUNK)
-        frames.append(data)
-        # Traitement en temps réel (par tranche de 1 seconde)
-        if len(frames) >= RATE // CHUNK:
-            audio_segment = np.frombuffer(b''.join(frames[-(RATE // CHUNK):]), dtype=np.int16)
-            emotion = predict_emotion(audio_segment, output_probs=False, sampling_rate=RATE)
-            real_time_emotions.append(emotion)
-            emotion_placeholder.line_chart(real_time_emotions)  # Affichage graphique des émotions
-    # Arrêt de l'enregistrement
-    stream.stop_stream()
-    stream.close()
-    audio.terminate()
-    # Sauvegarde de l'audio enregistré
-    wf = wave.open("output.wav", "wb")
-    wf.setnchannels(CHANNELS)
-    wf.setsampwidth(audio.get_sample_size(FORMAT))
-    wf.setframerate(RATE)
-    wf.writeframes(b"".join(frames))
-    wf.close()
-    # Prédiction finale sur tout l'audio enregistré
-    full_audio_data = np.frombuffer(b''.join(frames), dtype=np.int16)
-    final_emotion = predict_emotion(full_audio_data)
-    final_emotion_placeholder.write(f"Émotion finale prédite : {final_emotion}")
-################################
-### Real time prediction for uploaded audio file
-###############################
-# Charger le modèle wav2vec et le processeur
-# # Configuration Streamlit
-# st.title("Analyse des émotions en temps réel")
-# uploaded_file = st.file_uploader("Choisissez un fichier audio", type=["wav", "mp3"])
-# if uploaded_file is not None:
-#     # Charger et rééchantillonner l'audio
-#     audio, sr = librosa.load(uploaded_file, sr=16000)
-#     # Paramètres de la fenêtre glissante
-#     window_size = 1  # en secondes
-#     hop_length = 0.5  # en secondes
-#     # Créer un graphique en temps réel
-#     fig, ax = plt.subplots()
-#     lines = [ax.plot([], [], label=emotion)[0] for emotion in emotions]
-#     ax.set_ylim(0, 1)
-#     ax.set_xlim(0, len(audio) / sr)
-#     ax.set_xlabel("Temps (s)")
-#     ax.set_ylabel("Probabilité")
-#     ax.legend()
-#     chart = st.pyplot(fig)
-#     # Traitement par fenêtre glissante
-#     for i in range(0, len(audio), int(hop_length * sr)):
-#         chunk = audio[i:i + int(window_size * sr)]
-#         if len(chunk) < int(window_size * sr):
-#             break
-#         emotion_scores = predict_emotion(chunk, output_probs=False, sampling_rate=RATE)
-#         # Mettre à jour le graphique
-#         for emotion, line in zip(emotions, lines):
-#             xdata = line.get_xdata().tolist()
-#             ydata = line.get_ydata().tolist()
-#             xdata.append(i / sr)
-#             ydata.append(emotion_scores[emotion])
-#             line.set_data(xdata, ydata)
-#         ax.relim()
-#         ax.autoscale_view()
-#         chart.pyplot(fig)
-#     st.success("Analyse terminée !")
-############################################
-### Progress bar
-############################################
-with st.status("Downloading data...", expanded=True) as status:
-    st.write("Searching for data...")
-    time.sleep(2)
-    st.write("Found URL.")
-    time.sleep(1)
-    st.write("Downloading data...")
-    time.sleep(1)
-    status.update(
-        label="Download complete!", state="complete", expanded=False
-    )
-st.button("Rerun")
-############################################
-### Time duration estimation
-############################################
-progress_bar = st.progress(0)
-time_placeholder = st.empty()
-total_time = 10  # Total estimated time in seconds
-for i in range(total_time):
-    # Update progress bar
-    progress_bar.progress((i + 1) / total_time)
-    # Update time estimation
-    remaining_time = total_time - i - 1
-    time_placeholder.text(f"Estimated time remaining: {remaining_time} seconds")
-    # Simulate task progress
-    time.sleep(1)
-############################################
-### Audio file noise reduction
-############################################
-from pydub import AudioSegment
-import noisereduce as nr
-from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
-# Fonction de réduction de bruit
-def reduce_noise(audio_data, sr):
-    reduced_noise = nr.reduce_noise(y=audio_data, sr=sr)
-    return reduced_noise
-# Chargement du modèle wav2vec
-processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
-model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
-# Interface Streamlit
-st.title("Application de transcription audio avec réduction de bruit")
-uploaded_file = st.file_uploader("Choisissez un fichier audio .wav", type="wav")
-if uploaded_file is not None:
-    # Chargement et prétraitement de l'audio
-    audio = AudioSegment.from_wav(uploaded_file)
-    audio_array = np.array(audio.get_array_of_samples())
-    # Réduction de bruit
-    reduced_noise_audio = reduce_noise(audio_array, audio.frame_rate)
-    # Traitement avec wav2vec
-    input_values = processor(reduced_noise_audio, sampling_rate=audio.frame_rate, return_tensors="pt").input_values
-    with torch.no_grad():
-        logits = model(input_values).logits
-    predicted_ids = torch.argmax(logits, dim=-1)
-    transcription = processor.batch_decode(predicted_ids)[0]
-    st.audio(uploaded_file, format="audio/wav")
-    st.write("Transcription:")
-    st.write(transcription)
-############################################
-### Choix des émotions
-############################################
-# options = ['Sadness','Anger', 'Disgust', 'Fear', 'Surprise', 'Joy','Neutral']
-# selected_options = st.multiselect('What emotions do you want to be displayed', options, default=['Joy', 'Anger','Neutral])
-############################################
-### Transcription Speech2Text
-############################################
-# # Fonction pour transcrire l'audio
-# def transcribe_audio(audio):
-#     # Préparer les données d'entrée pour le modèle
-#     input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
-#     # Passer les données dans le modèle pour obtenir les logits
-#     with torch.no_grad():
-#         logits = model(input_values).logits
-#     # Décoder les prédictions en texte
-#     predicted_ids = torch.argmax(logits, dim=-1)
-#     transcription = processor.batch_decode(predicted_ids)[0]
-#     return transcription
-# # Charger et transcrire l'audio
-# # audio, rate = load_audio(audio_file_path) # (re)chargement de l'audio si nécessaire
-# transcription = transcribe_audio(audio)
-# # Afficher la transcription
-# print("Transcription :", transcription)
-############################################
-### Feedback
-############################################
-import pandas as pd
-import os
-# Initialisation du fichier CSV
-csv_file = "predictions/feedback.csv"
-# Vérifier si le fichier CSV existe, sinon le créer avec des colonnes appropriées
-if not os.path.exists(csv_file):
-    df = pd.DataFrame(columns=["filepath", "prediction", "feedback"])
-    df.to_csv(csv_file, index=False)
-# Charger les données existantes du CSV
-df = pd.read_csv(csv_file)
-# Interface Streamlit
-st.title("Predicted emotion feedback")
-# Simuler une prédiction pour l'exemple (remplacez par votre modèle réel)
-audio_file_name = "example_audio.wav"
-predicted_emotion = "Joie"  # Exemple de prédiction
-st.write(f"Fichier audio : {audio_file_name}")
-st.write(f"Émotion détectée : {predicted_emotion}")
-# Formulaire de feedback
-with st.form("feedback_form"):
-    st.write("Est-ce la bonne émotion qui a été détectée ? Cochez la réelle émotion.")
-    feedback = st.selectbox("Votre réponse :", ['Sadness','Anger', 'Disgust', 'Fear', 'Surprise', 'Joy', 'Neutral'])
-    submit_button = st.form_submit_button("Soumettre")
-    st.write("En cliquant sur ce bouton, vous acceptez que votre audio soit sauvegardé dans notre base de données.")
-    if submit_button:
-        # Ajouter le feedback au DataFrame
-        new_entry = {"filepath": audio_file_name, "prediction": predicted_emotion, "feedback": feedback}
-        df = df.append(new_entry, ignore_index=True)
-        # Sauvegarder les données mises à jour dans le fichier CSV
-        df.to_csv(csv_file, index=False)
-        # Sauvegarder le fichier audio
-        with open("predictions/data", "wb") as f:
-            f.write(uploaded_file.getbuffer())
-        # Confirmation pour l'utilisateur
-        st.success("Merci pour votre retour ! Vos données ont été sauvegardées.")
-# Afficher les données sauvegardées (optionnel)
-# st.write("Données collectées jusqu'à présent :")
-# st.dataframe(df)
-############################################
-### Predict proba (to replace in predict.py)
-############################################
-import librosa
-def predict_emotion_probabilities(audio_path):
-    waveform, _ = librosa.load(audio_path, sr=16000)
-    input_values = processor(waveform, return_tensors="pt", sampling_rate=16000).input_values
-    input_values = input_values.to(device)
-    with torch.no_grad():
-        outputs = model(input_values)
-    # Appliquer softmax pour obtenir des probabilités
-    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
-    # Convertir en numpy array et prendre le premier (et seul) élément
-    probabilities = probabilities[0].detach().cpu().numpy()
-    # Créer un dictionnaire associant chaque émotion à sa probabilité
-    emotion_probabilities = {emotion: prob for emotion, prob in zip(emotion_labels, probabilities)}
-    return emotion_probabilities

views/studio.py DELETED Viewed

@@ -1,176 +0,0 @@
-import streamlit as st
-from st_audiorec import st_audiorec
-from src.model.transcriber import transcribe_audio
-def studio():
-    st.title("SISE ultimate challenge")
-    st.write("C'est le dernier challenge de la formation SISE.")
-    st.markdown("""
-        **Overview:**
-        - Analyse de logs
-        - Analyse de données
-        - Machine learning
-    """)
-    st.markdown("---")
-    st.header("🎧 Audio File Studio")
-    tab1, tab2, tab3 = st.tabs(["⬆️ Record Audio", "🔈 Realtime Audio", "📝 Transcription"])
-    with tab1:
-        st.header("⬆️ Upload Audio Record")
-        st.write("Here you can upload a pre-recorded audio.")
-        audio_file = st.file_uploader("Upload an audio file", type=["wav"])
-        if "audio_file" not in st.session_state:
-            st.session_state.audio_file = None
-        if audio_file is not None:
-            st.success("Audio file uploaded successfully !")
-            st.session_state.audio_file = audio_file
-            # with open(os.path.join(DIRECTORY,FILE_NAME), "wb") as f:
-            #     f.write(audio_file.getbuffer())
-            #     st.success(f"Saved file: {FILE_NAME}")
-    with tab2:
-        st.header("🔈 Realtime Audio Record")
-        st.write("Here you can record an audio.")
-        if "audio_file" not in st.session_state:
-            st.session_state.audio_file = None
-        audio_file = st_audiorec()
-        if audio_file is not None:
-            st.audio(audio_file, format='audio/wav')
-            st.success("Audio recorded successfully !")
-            st.session_state.audio_file = audio_file
-##############################################"realtime audio record"##############################################
-        # Boutons pour démarrer et arrêter l'enregistrement
-        # start_button = st.button("Démarrer l'enregistrement")
-        # stop_button = st.button("Arrêter l'enregistrement")
-        # start_stop = st.button("Démarrer/Arrêter l'enregistrement")
-        # Zone de visualisation des émotions en temps réel
-        # emotion_placeholder = st.empty()
-        # final_emotion_placeholder = st.empty()
-        # audio = pyaudio.PyAudio()
-        # audio_buffer = np.array([])
-        # emotion_prediction = "Aucune prédiction"
-        # is_recording = False
-        # if start_stop:
-        #     is_recording = not is_recording
-        #     # Variables globales pour le partage de données entre threads
-        #     def audio_callback(in_data, frame_count, time_info, status):
-        #         global audio_buffer
-        #         audio_data = np.frombuffer(in_data, dtype=np.float32)
-        #         audio_buffer = np.concatenate((audio_buffer, audio_data))
-        #         return (in_data, pyaudio.paContinue)
-        #     def predict_emotion_thread():
-        #         global audio_buffer, emotion_prediction
-        #         while is_recording:
-        #             if len(audio_buffer) >= CHUNK:
-        #                 chunk = audio_buffer[:CHUNK]
-        #                 audio_buffer = audio_buffer[STRIDE:]
-        #                 emotion_prediction = predict_emotion(chunk, output_probs=False, sampling_rate=RATE)  # Utilisez votre modèle ici
-        #             # time.sleep(0.1)
-        #     if is_recording:
-        #         audio_buffer = np.array([])
-        #         stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True,
-        #                         frames_per_buffer=CHUNK, stream_callback=audio_callback)
-        #         stream.start_stream()
-        #         threading.Thread(target=predict_emotion_thread, daemon=True).start()
-        #         st.write("Enregistrement en cours...")
-        #     else:
-        #         stream.stop_stream()
-        #         stream.close()
-        #         st.write("Enregistrement arrêté.")
-        # emotion_display = st.empty()
-        # while is_recording:
-        #     emotion_display.write(f"Émotion détectée : {emotion_prediction}")
-        #     # time.sleep(0.1)
-        # audio.terminate(
-            # stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
-            # frames = []
-            # real_time_emotions = []
-            # while not stop_button:
-            #     data = stream.read(CHUNK)
-            #     frames.append(data)
-            #     # Traitement en temps réel (par tranche de 1 seconde)
-            #     if len(frames) >= RATE // CHUNK:
-            #         audio_segment = np.frombuffer(b''.join(frames[-(RATE // CHUNK):]), dtype=np.int16)
-            #         emotion = predict_emotion(audio_segment, output_probs=False, sampling_rate=RATE)
-            #         real_time_emotions.append(emotion)
-            #         emotion_placeholder.line_chart(real_time_emotions)  # Affichage graphique des émotions
-            # # Arrêt de l'enregistrement
-            # stream.stop_stream()
-            # stream.close()
-            # audio.terminate()
-            # # Sauvegarde de l'audio enregistré
-            # wf = wave.open("output.wav", "wb")
-            # wf.setnchannels(CHANNELS)
-            # wf.setsampwidth(audio.get_sample_size(FORMAT))
-            # wf.setframerate(RATE)
-            # wf.writeframes(b"".join(frames))
-            # wf.close()
-            # # Prédiction finale sur tout l'audio enregistré
-            # full_audio_data = np.frombuffer(b''.join(frames), dtype=np.int16)
-            # final_emotion = predict_emotion(full_audio_data)
-            # final_emotion_placeholder.write(f"Émotion finale prédite : {final_emotion}")
-##############################################"end realtime audio record"##############################################
-    with tab3:
-        st.header("📝 Speech2Text Transcription")
-        st.write("Here you can get the audio transcript.")
-        save = st.checkbox("Save transcription to .txt", value=False, key="save-transcript")
-        ############################# A décommenté quand ce sera débogué
-        if st.button("Transcribe", key="transcribe-button"):
-            # Fonction pour transcrire l'audio
-            transcription = transcribe_audio(st.audio)
-            # Charger et transcrire l'audio
-            # audio, rate = load_audio(audio_file_path) # (re)chargement de l'audio si nécessaire
-            transcription = transcribe_audio(audio_file, sampling_rate=16000)
-            # Afficher la transcription
-            st.write("Transcription :", transcription)
-            st.success("Audio registered successfully.")
-            if save:
-                file_path = "transcript.txt"
-                # Write the text to the file
-                with open(file_path, "w") as file:
-                    file.write(transcription)
-                st.success(f"Text saved to {file_path}")