Spaces:

jdalfonso
/

SISE-ULTIMATE-CHALLENGE

Sleeping

App Files Files Community

Cyr-CK commited on Mar 13

Commit

233adb5

1 Parent(s): cfd1552

Updated app

Browse files

Files changed (14) hide show

__init__.py +0 -0
app.py +8 -5
src/__init__.py +0 -0
src/config.py +1 -0
src/model/__init__.py +0 -1
src/model/emotion_classifier.py +28 -11
src/model/{feature_extrator.py → feature_extractor.py} +1 -1
src/predict.py +37 -21
src/utils/preprocessing.py +2 -2
views/about.py +2 -2
views/application.py +0 -212
views/emotion_analysis.py +150 -0
views/real_time.py +0 -15
views/studio.py +191 -0

__init__.py ADDED Viewed

File without changes

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import streamlit as st
 from streamlit_option_menu import option_menu
-from views.application import application
 from views.about import about
 if "model_loaded" not in st.session_state:
@@ -17,8 +18,8 @@ with st.sidebar:
     # Navigation menu with icons
     selected_tab = option_menu(
         menu_title=None,  # Added menu_title parameter
-        options=["Application", "About"],
-        icons=["robot", "bar-chart", "robot"],
         menu_icon="cast",
         default_index=0,
         # styles={
@@ -30,8 +31,10 @@ with st.sidebar:
     )
-if selected_tab == "Application":
-    application()
 elif selected_tab == "About":
     about()

 import streamlit as st
 from streamlit_option_menu import option_menu
+from views.studio import studio
+from views.emotion_analysis import emotion_analysis
 from views.about import about
 if "model_loaded" not in st.session_state:
     # Navigation menu with icons
     selected_tab = option_menu(
         menu_title=None,  # Added menu_title parameter
+        options=["Studio", "Emotion Analysis", "About"],
+        icons=["record-circle", "robot", "info-circle"],
         menu_icon="cast",
         default_index=0,
         # styles={
     )
+if selected_tab == "Studio":
+    studio()
+elif selected_tab == "Emotion Analysis":
+    emotion_analysis()
 elif selected_tab == "About":
     about()

src/__init__.py ADDED Viewed

File without changes

src/config.py CHANGED Viewed

@@ -11,6 +11,7 @@ if not HF_API_KEY:
 # Labels d'émotions
 LABELS = {"colere": 0, "neutre": 1, "joie": 2}
 NUM_LABELS = len(LABELS)
 # Choisir le device

 # Labels d'émotions
 LABELS = {"colere": 0, "neutre": 1, "joie": 2}
+LABELS = ["colere", "neutre", "joie"]
 NUM_LABELS = len(LABELS)
 # Choisir le device

src/model/__init__.py CHANGED Viewed

	@@ -1 +0,0 @@
1	-

src/model/emotion_classifier.py CHANGED Viewed

@@ -1,15 +1,32 @@
 import torch.nn as nn
 class EmotionClassifier(nn.Module):
-    def __init__(self, feature_dim, num_labels):
         super(EmotionClassifier, self).__init__()
-        self.fc1 = nn.Linear(feature_dim, 256)
-        self.relu = nn.ReLU()
-        self.dropout = nn.Dropout(0.3)
-        self.fc2 = nn.Linear(256, num_labels)
-    def forward(self, x):
-        x = self.fc1(x)
-        x = self.relu(x)
-        x = self.dropout(x)
-        return self.fc2(x)

+import torch
 import torch.nn as nn
+# Prédit 33% environ partout (dans le cas 3 classes)
+# class EmotionClassifier(nn.Module):
+#     def __init__(self, feature_dim, num_labels):
+#         super(EmotionClassifier, self).__init__()
+#         self.fc1 = nn.Linear(feature_dim, 256)
+#         self.relu = nn.ReLU()
+#         self.dropout = nn.Dropout(0.3)
+#         self.fc2 = nn.Linear(256, num_labels)
+#     def forward(self, x):
+#         x = self.fc1(x)
+#         x = self.relu(x)
+#         x = self.dropout(x)
+#         return self.fc2(x)
 class EmotionClassifier(nn.Module):
+    def __init__(self, feature_dim, num_labels=3):
         super(EmotionClassifier, self).__init__()
+        self.fc = nn.Linear(feature_dim.config.hidden_size, num_labels)
+        self.softmax = nn.Softmax(dim=1)
+    def forward(self, input_values):
+        outputs = self(input_values).last_hidden_state
+        pooled_output = torch.mean(outputs, dim=1)
+        logits = self.fc(pooled_output)
+        return self.softmax(logits)

src/model/{feature_extrator.py → feature_extractor.py} RENAMED Viewed

@@ -1,6 +1,6 @@
 import torch
 from transformers import Wav2Vec2Model, Wav2Vec2Processor
-from config import MODEL_NAME, DEVICE
 processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
 feature_extractor = Wav2Vec2Model.from_pretrained(MODEL_NAME).to(DEVICE)

 import torch
 from transformers import Wav2Vec2Model, Wav2Vec2Processor
+from src.config import MODEL_NAME, DEVICE
 processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
 feature_extractor = Wav2Vec2Model.from_pretrained(MODEL_NAME).to(DEVICE)

src/predict.py CHANGED Viewed

@@ -1,47 +1,63 @@
 import torch
 import torchaudio
 import soundfile as sf
 import numpy as np
-from model.emotion_classifier import EmotionClassifier
-from model.feature_extrator import feature_extractor, processor
-from utils.preprocessing import resampler
-from config import DEVICE, LABELS
 import os
 # Charger le modèle sauvegardé
 classifier = EmotionClassifier(feature_extractor.config.hidden_size, len(LABELS)).to(DEVICE)
-classifier.load_state_dict(torch.load(os.path.join("best_emotion_model.pth"), map_location=torch.device(DEVICE)))
 classifier.eval()
 # Fonction de prédiction
-def predict_emotion(audio_path):
     # Charger l'audio
-    speech, sample_rate = sf.read(audio_path, dtype="float32")
     # Rééchantillonnage si nécessaire
-    if sample_rate != 16000:
-        speech = torch.tensor(speech).unsqueeze(0)
-        speech = resampler(speech).squeeze(0).numpy()
     # Extraire les features
-    inputs = processor(speech, sampling_rate=16000, return_tensors="pt", padding=True)
     input_values = inputs.input_values.to(DEVICE)
     with torch.no_grad():
         features = feature_extractor(input_values).last_hidden_state.mean(dim=1)
         logits = classifier(features)
-    # Obtenir la prédiction
-    predicted_label = torch.argmax(logits, dim=-1).item()
-    emotion = list(LABELS.keys())[predicted_label]
-    return emotion
 # Exemple d'utilisation
-if __name__ == "__main__":
-    base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "data"))
-    audio_file = os.path.join(base_path, "colere", "c1ac.wav")
-    emotion = predict_emotion(audio_file)
-    print(f"🎤 L'émotion prédite est : {emotion}")

 import torch
 import torchaudio
+import librosa
 import soundfile as sf
 import numpy as np
+from src.model.emotion_classifier import EmotionClassifier
+from src.model.feature_extractor import feature_extractor, processor
+from src.utils.preprocessing import resampler
+from src.config import DEVICE, LABELS
 import os
 # Charger le modèle sauvegardé
 classifier = EmotionClassifier(feature_extractor.config.hidden_size, len(LABELS)).to(DEVICE)
+classifier.load_state_dict(torch.load(os.path.join("src","model","best_emotion_model.pth"), map_location=torch.device(DEVICE)), strict=False)
 classifier.eval()
 # Fonction de prédiction
+def predict_emotion(speech, output_probs=False, sampling_rate=16000):
     # Charger l'audio
+    # waveform, sample_rate = librosa.load(speech, sr=None)
+    # speech_audio, sample_rate = sf.read(speech, dtype="float32")
     # Rééchantillonnage si nécessaire
+    # if sample_rate != sampling_rate:
+    #     speech = torch.tensor(speech).unsqueeze(0)
+    #     speech = resampler(speech).squeeze(0).numpy()
     # Extraire les features
+    inputs = processor(speech, sampling_rate=sampling_rate, return_tensors="pt", padding=True)
     input_values = inputs.input_values.to(DEVICE)
     with torch.no_grad():
         features = feature_extractor(input_values).last_hidden_state.mean(dim=1)
         logits = classifier(features)
+    if output_probs:
+        # Appliquer softmax pour obtenir des probabilités
+        probabilities = torch.nn.functional.softmax(logits, dim=-1)
+        # Convertir en numpy array et prendre le premier (et seul) élément
+        probabilities = probabilities[0].detach().cpu().numpy()
+        # Créer un dictionnaire associant chaque émotion à sa probabilité
+        emotion_probabilities = {emotion: prob for emotion, prob in zip(LABELS, probabilities)}
+        # emotion_probabilities = {"emotions": [emotion for emotion in emotion_labels],
+        #                          "probabilities": [prob for prob in probabilities]}
+        return emotion_probabilities
+    else:
+        # Obtenir l'émotion la plus probable (i.e. la prédiction)
+        predicted_label = torch.argmax(logits, dim=-1).item()
+        emotion = LABELS[predicted_label]
+        return emotion
 # Exemple d'utilisation
+# if __name__ == "__main__":
+#     base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "data"))
+#     audio_file = os.path.join(base_path, "colere", "c1ac.wav")
+#     emotion = predict_emotion(audio_file)
+#     print(f"🎤 L'émotion prédite est : {emotion}")

src/utils/preprocessing.py CHANGED Viewed

@@ -2,8 +2,8 @@ import soundfile as sf
 import torch
 import torchaudio
 import numpy as np
-from model.feature_extrator import processor
-from config import DEVICE
 # Resampler
 resampler = torchaudio.transforms.Resample(orig_freq=48_000, new_freq=16_000)

 import torch
 import torchaudio
 import numpy as np
+from src.model.feature_extractor import processor # type: ignore
+from src.config import DEVICE
 # Resampler
 resampler = torchaudio.transforms.Resample(orig_freq=48_000, new_freq=16_000)

views/about.py CHANGED Viewed

@@ -15,7 +15,7 @@ def about():
         st.markdown("### Collaborators")
         st.write("""
         - [Falonne Kpamegan](https://github.com/marinaKpamegan)
-        - [Nancy](https://github.com/yminanc)
-        - [Cyril](https://github.com/Cyr-CK)
         - [Juan Alfonso](https://github.com/jdalfons)
         """)

         st.markdown("### Collaborators")
         st.write("""
         - [Falonne Kpamegan](https://github.com/marinaKpamegan)
+        - [Nancy Randriamiarijaona](https://github.com/yminanc)
+        - [Cyril Kocab](https://github.com/Cyr-CK)
         - [Juan Alfonso](https://github.com/jdalfons)
         """)

views/application.py DELETED Viewed

@@ -1,212 +0,0 @@
-import streamlit as st
-import pandas as pd
-from st_audiorec import st_audiorec
-import datetime
-import os
-import matplotlib.pyplot as plt
-import librosa
-from src.model.transcriber import transcribe_audio
-from predict import predict_emotion
-DIRECTORY = "audios"
-FILE_NAME = "audio.wav"
-CHUNK = 1024
-# FORMAT = pyaudio.paInt16
-CHANNELS = 1
-RATE = 16000
-def application():
-    st.title("SISE ultimate challenge")
-    st.write("C'est le dernier challenge de la formation SISE.")
-    st.markdown("""
-        **Overview:**
-        - Analyse de logs
-        - Analyse de données
-        - Machine learning
-    """)
-    st.markdown("---")
-    tab1, tab2, tab3 = st.tabs(["⬆️ Record Audio", "🔈 Realtime Audio", "📝 Transcription"])
-    with tab1:
-        st.header("⬆️ Upload Audio Record")
-        st.write("Here you can upload a pre-recorded audio.")
-        audio_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "ogg"])
-        if audio_file is not None:
-            with open(os.path.join(DIRECTORY,FILE_NAME), "wb") as f:
-                f.write(audio_file.getbuffer())
-                st.success(f"Saved file: {FILE_NAME}")
-            start_inference = st.button("Start emotion recogniton","inf_on_upl_btn")
-            emotion_labels = ["joie", "colère", "neutre"]
-            colors = ['#f6d60a', '#f71c1c', '#cac8c8']
-            if start_inference:
-                # Configuration Streamlit
-                with st.spinner("Real-time emotion analysis..."):
-                    # uploaded_file = st.file_uploader("Choisissez un fichier audio", type=["wav", "mp3"])
-                    if audio_file is not None:
-                        # Charger et rééchantillonner l'audio
-                        audio, sr = librosa.load(audio_file, sr=RATE)
-                        # chunk = audio_file
-                        # Paramètres de la fenêtre glissante
-                        window_size = 1  # en secondes
-                        hop_length = 0.5  # en secondes
-                        # Créer un graphique en temps réel
-                        fig, ax = plt.subplots()
-                        lines = [ax.plot([], [], label=emotion)[0] for emotion in emotion_labels]
-                        ax.set_ylim(0, 1)
-                        ax.set_xlim(0, len(audio) / sr)
-                        ax.set_xlabel("Temps (s)")
-                        ax.set_ylabel("Probabilité")
-                        ax.legend()
-                        chart = st.pyplot(fig)
-                        scores = [[],[],[]] # 3 émotions pour l'instant
-                        # Traitement par fenêtre glissante
-                        for i in range(0, len(audio), int(hop_length * sr)):
-                            chunk = audio[i:i + int(window_size * sr)]
-                            if len(chunk) < int(window_size * sr):
-                                break
-                            emotion_scores = predict_emotion(chunk, output_probs=True, sampling_rate=RATE)
-                            # Mettre à jour le graphique
-                            for emotion, line in zip(emotion_labels, lines):
-                                xdata = list(line.get_xdata())
-                                ydata = list(line.get_ydata())
-                                xdata.append(i / sr)
-                                ydata.append(emotion_scores[emotion])
-                                scores[list(emotion_scores).index(emotion)].append(emotion_scores[emotion])
-                                line.set_data(xdata, ydata)
-                            ax.relim()
-                            ax.autoscale_view()
-                            chart.pyplot(fig, use_container_width=True)
-                        # Prepare the styling
-                        st.markdown("""
-                                    <style>
-                                    .colored-box {
-                                        padding: 10px;
-                                        border-radius: 5px;
-                                        color: white;
-                                        font-weight: bold;
-                                        text-align: center;
-                                    }
-                                    </style>
-                                    """
-                                    , unsafe_allow_html=True)
-                        # Dynamically create the specified number of columns
-                        columns = st.columns(len(emotion_scores))
-                        # emotion_scores_mean = [sum(sublist) / len(sublist) for sublist in scores]
-                        emotion_scores_mean = {emotion:sum(sublist) / len(sublist) for emotion, sublist in zip(emotion_labels, scores)}
-                        max_emo = max(emotion_scores_mean)
-                        emotion_scores_sorted = dict(sorted(emotion_scores_mean.items(), key=lambda x: x[1], reverse=True))
-                        colors_sorted = [colors[list(emotion_scores_mean.keys()).index(key)] for key in list(emotion_scores_sorted.keys())]
-                        # Add content to each column
-                        for i, (col, emotion) in enumerate(zip(columns, emotion_scores_sorted)):
-                            color = colors_sorted[i % len(colors_sorted)]  # Cycle through colors if more columns than colors
-                            col.markdown(f"""
-                                        <div class="colored-box" style="background-color: {color};">
-                                            {emotion} : {100*emotion_scores_sorted[emotion]:.2f} %
-                                        </div>
-                                        """
-                            , unsafe_allow_html=True)
-                        st.success("Analyse terminée !")
-                    else:
-                        st.warning("You need to load an audio file !")
-                st.subheader("Feedback")
-                # Initialisation du fichier CSV
-                csv_file = os.path.join("src","predictions","feedback.csv")
-                # Vérifier si le fichier CSV existe, sinon le créer avec des colonnes appropriées
-                if not os.path.exists(csv_file):
-                    df = pd.DataFrame(columns=["filepath", "prediction", "feedback"])
-                    df.to_csv(csv_file, index=False)
-                # Charger les données existantes du CSV
-                df = pd.read_csv(csv_file)
-                with st.form("feedback_form"):
-                    st.write("What should have been the correct prediction ? (*Choose the same emotion if the prediction was correct*).")
-                    feedback = st.selectbox("Your answer :", ['Sadness','Anger', 'Disgust', 'Fear', 'Surprise', 'Joy', 'Neutral'])
-                    submit_button = st.form_submit_button("Submit")
-                    st.write("En cliquant sur ce bouton, vous acceptez que votre audio soit sauvegardé dans notre base de données.")
-                    if submit_button:
-                        # Ajouter le feedback au DataFrame
-                        new_entry = {"filepath": audio_file.name, "prediction": max_emo, "feedback": feedback}
-                        df = df.append(new_entry, ignore_index=True)
-                        # Sauvegarder les données mises à jour dans le fichier CSV
-                        df.to_csv(csv_file, index=False)
-                        # Sauvegarder le fichier audio
-                        with open(os.path.join("src","predictions","data"), "wb") as f:
-                            f.write(audio_file.getbuffer())
-                        # Confirmation pour l'utilisateur
-                        st.success("Merci pour votre retour ! Vos données ont été sauvegardées.")
-    with tab2:
-        st.header("🔈 Realtime Audio Record")
-        st.write("Here you can record an audio.")
-        if st.button("Register", key="register-button"):
-            st.success("Audio registered successfully.")
-        audio_file = st_audiorec()
-        if audio_file is not None:
-            st.audio(audio_file, format='audio/wav')
-    with tab3:
-        st.header("📝 Speech2Text Transcription")
-        st.write("Here you can get the audio transcript.")
-        save = st.checkbox("Save transcription to .txt", value=False, key="save-transcript")
-        ############################# A décommenté quand ce sera débogué
-        if st.button("Transcribe", key="transcribe-button"):
-        #     # Fonction pour transcrire l'audio
-            # transcription = transcribe_audio(st.audio)
-        #     # Charger et transcrire l'audio
-        #     # audio, rate = load_audio(audio_file_path) # (re)chargement de l'audio si nécessaire
-            # transcription = transcribe_audio(audio_file, sampling_rate=16000)
-        #     # Afficher la transcription
-            # st.write("Transcription :", transcription)
-            st.success("Audio registered successfully.")
-        #     if save:
-        #         file_path = "transcript.txt"
-        #         # Write the text to the file
-        #         with open(file_path, "w") as file:
-        #             file.write(transcription)
-        #         st.success(f"Text saved to {file_path}")

views/emotion_analysis.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import streamlit as st
+import pandas as pd
+import os
+import matplotlib.pyplot as plt
+import librosa
+from src.predict import predict_emotion
+DIRECTORY = "audios"
+FILE_NAME = "audio.wav"
+RATE = 16000
+def emotion_analysis():
+    st.header("❤️ Emotion Analysis")
+    if st.session_state.audio_file is None:
+        st.info("Please, upload or record an audio file in the studio tab")
+        st.stop()
+    else:
+        audio_file = st.session_state.audio_file
+        start_inference = st.button("Start emotion recogniton","inf_on_upl_btn")
+        emotion_labels = ["colere", "neutre", "joie"]
+        colors = ['#f71c1c', '#cac8c8', '#f6d60a']
+        if start_inference:
+            # Configuration Streamlit
+            with st.spinner("Real-time emotion analysis..."):
+                # uploaded_file = st.file_uploader("Choisissez un fichier audio", type=["wav", "mp3"])
+                if audio_file is not None:
+                    # Charger et rééchantillonner l'audio
+                    audio, sr = librosa.load(audio_file, sr=RATE)
+                    # chunk = audio_file
+                    # Paramètres de la fenêtre glissante
+                    window_size = 1  # 1 seconde de données
+                    hop_length = 0.5  # 0.5 secondes de chevauchement
+                    # Créer un graphique en temps réel
+                    fig, ax = plt.subplots()
+                    lines = [ax.plot([], [], label=emotion)[0] for emotion in emotion_labels]
+                    ax.set_ylim(0, 1)
+                    ax.set_xlim(0, len(audio) / sr)
+                    ax.set_xlabel("Temps (s)")
+                    ax.set_ylabel("Probabilité")
+                    chart = st.pyplot(fig)
+                    scores = [[],[],[]] # 3 émotions pour l'instant
+                    # Traitement par fenêtre glissante
+                    for i in range(0, len(audio), int(hop_length * sr)):
+                        chunk = audio[i:i + int(window_size * sr)]
+                        if len(chunk) < int(window_size * sr):
+                            break
+                        emotion_scores = predict_emotion(chunk, output_probs=True, sampling_rate=RATE)
+                        # Mettre à jour le graphique
+                        for emotion, line in zip(emotion_labels, lines):
+                            xdata = list(line.get_xdata())
+                            ydata = list(line.get_ydata())
+                            colour = colors[list(emotion_scores).index(emotion)]
+                            xdata.append(i / sr)
+                            ydata.append(emotion_scores[emotion])
+                            scores[list(emotion_scores).index(emotion)].append(emotion_scores[emotion])
+                            line.set_data(xdata, ydata)
+                            line.set_color(colour)
+                        ax.relim()
+                        ax.autoscale_view()
+                        ax.legend()
+                        chart.pyplot(fig, use_container_width=True)
+                    # Prepare the styling
+                    st.markdown("""
+                                <style>
+                                .colored-box {
+                                    padding: 10px;
+                                    border-radius: 5px;
+                                    color: white;
+                                    font-weight: bold;
+                                    text-align: center;
+                                }
+                                </style>
+                                """
+                                , unsafe_allow_html=True)
+                    # Dynamically create the specified number of columns
+                    columns = st.columns(len(emotion_scores))
+                    # emotion_scores_mean = [sum(sublist) / len(sublist) for sublist in scores]
+                    emotion_scores_mean = {emotion:sum(sublist) / len(sublist) for emotion, sublist in zip(emotion_labels, scores)}
+                    max_emo = max(emotion_scores_mean)
+                    emotion_scores_sorted = dict(sorted(emotion_scores_mean.items(), key=lambda x: x[1], reverse=True))
+                    colors_sorted = [colors[list(emotion_scores_mean.keys()).index(key)] for key in list(emotion_scores_sorted.keys())]
+                    # Add content to each column
+                    for i, (col, emotion) in enumerate(zip(columns, emotion_scores_sorted)):
+                        color = colors_sorted[i % len(colors_sorted)]  # Cycle through colors if more columns than colors
+                        col.markdown(f"""
+                                    <div class="colored-box" style="background-color: {color};">
+                                        {emotion} : {100*emotion_scores_sorted[emotion]:.2f} %
+                                    </div>
+                                    """
+                        , unsafe_allow_html=True)
+                    st.success("Analyse terminée !")
+                else:
+                    st.warning("You need to load an audio file !")
+        if start_inference:
+            st.subheader("Feedback")
+            # Initialisation du fichier CSV
+            csv_file = os.path.join("src","predictions","feedback.csv")
+            # Vérifier si le fichier CSV existe, sinon le créer avec des colonnes appropriées
+            if not os.path.exists(csv_file):
+                df = pd.DataFrame(columns=["filepath", "prediction", "feedback"])
+                df.to_csv(csv_file, index=False)
+            # Charger les données existantes du CSV
+            df = pd.read_csv(csv_file)
+            with st.form("feedback_form"):
+                st.write("What should have been the correct prediction ? (*Choose the same emotion if the prediction was correct*).")
+                feedback = st.selectbox("Your answer :", ['Sadness','Anger', 'Disgust', 'Fear', 'Surprise', 'Joy', 'Neutral'])
+                submit_button = st.form_submit_button("Submit")
+                st.write("En cliquant sur ce bouton, vous acceptez que votre audio soit sauvegardé dans notre base de données.")
+                if submit_button:
+                    # Ajouter le feedback au DataFrame
+                    new_entry = pd.DataFrame([{"filepath": audio_file.name, "prediction": max_emo, "feedback": feedback}])
+                    # df = df.append(new_entry, ignore_index=True)
+                    df = pd.concat([df, new_entry], ignore_index=True)
+                    # Sauvegarder les données mises à jour dans le fichier CSV
+                    df.to_csv(csv_file, index=False)
+                    # Sauvegarder le fichier audio
+                    with open(os.path.join("src","predictions","data",audio_file.name), "wb") as f:
+                        f.write(audio_file.getbuffer())
+                    # Confirmation pour l'utilisateur
+                    st.success("Merci pour votre retour ! Vos données ont été sauvegardées.")

views/real_time.py CHANGED Viewed

@@ -10,12 +10,6 @@ from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
 import matplotlib.pyplot as plt
 import numpy as np
 import time
-from predict import predict_emotion
-# Charger le modèle Wav2Vec2 pour la classification des émotions
-model_name = "superb/wav2vec2-base-superb-er"  # Exemple de modèle pour la reconnaissance des émotions
-processor = Wav2Vec2Processor.from_pretrained(model_name)
-model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
 # Paramètres audio
 CHUNK = 1024
@@ -23,15 +17,6 @@ FORMAT = pyaudio.paInt16
 CHANNELS = 1
 RATE = 16000
-# Fonction pour prédire l'émotion à partir d'un segment audio
-# def predict_emotion(audio_data):
-#     inputs = processor(audio_data, sampling_rate=RATE, return_tensors="pt", padding=True)
-#     with torch.no_grad():
-#         logits = model(**inputs).logits
-#     predicted_id = torch.argmax(logits, dim=-1).item()
-#     emotion = model.config.id2label[predicted_id]
-#     return emotion
 # Interface Streamlit
 st.title("Détection des émotions en temps réel")

 import matplotlib.pyplot as plt
 import numpy as np
 import time
 # Paramètres audio
 CHUNK = 1024
 CHANNELS = 1
 RATE = 16000
 # Interface Streamlit
 st.title("Détection des émotions en temps réel")

views/studio.py ADDED Viewed

	@@ -0,0 +1,191 @@

+import streamlit as st
+from st_audiorec import st_audiorec
+# from src.model.transcriber import transcribe_audio
+def studio():
+    st.title("SISE ultimate challenge")
+    st.write("C'est le dernier challenge de la formation SISE.")
+    st.markdown("""
+        **Overview:**
+        - Analyse de logs
+        - Analyse de données
+        - Machine learning
+    """)
+    st.markdown("---")
+    st.header("🎧 Audio File Studio")
+    tab1, tab2, tab3 = st.tabs(["⬆️ Record Audio", "🔈 Realtime Audio", "📝 Transcription"])
+    with tab1:
+        st.header("⬆️ Upload Audio Record")
+        st.write("Here you can upload a pre-recorded audio.")
+        audio_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "ogg"])
+        if "audio_file" not in st.session_state:
+            st.session_state.audio_file = None
+        if audio_file is not None:
+            st.success("Audio file uploaded successfully !")
+            st.session_state.audio_file = audio_file
+            # with open(os.path.join(DIRECTORY,FILE_NAME), "wb") as f:
+            #     f.write(audio_file.getbuffer())
+            #     st.success(f"Saved file: {FILE_NAME}")
+    with tab2:
+        st.header("🔈 Realtime Audio Record")
+        st.write("Here you can record an audio.")
+        if "audio_file" not in st.session_state:
+            st.session_state.audio_file = None
+        audio_file = st_audiorec()
+        if audio_file is not None:
+            st.audio(audio_file, format='audio/wav')
+            st.success("Audio recorded successfully !")
+            st.session_state.audio_file = audio_file
+        # Boutons pour démarrer et arrêter l'enregistrement
+        # start_button = st.button("Démarrer l'enregistrement")
+        # stop_button = st.button("Arrêter l'enregistrement")
+        # start_stop = st.button("Démarrer/Arrêter l'enregistrement")
+        # Zone de visualisation des émotions en temps réel
+        # emotion_placeholder = st.empty()
+        # final_emotion_placeholder = st.empty()
+        # audio = pyaudio.PyAudio()
+        # audio_buffer = np.array([])
+        # emotion_prediction = "Aucune prédiction"
+        # is_recording = False
+        # if start_stop:
+        #     is_recording = not is_recording
+        #     # Variables globales pour le partage de données entre threads
+        #     def audio_callback(in_data, frame_count, time_info, status):
+        #         global audio_buffer
+        #         audio_data = np.frombuffer(in_data, dtype=np.float32)
+        #         audio_buffer = np.concatenate((audio_buffer, audio_data))
+        #         return (in_data, pyaudio.paContinue)
+        #     def predict_emotion_thread():
+        #         global audio_buffer, emotion_prediction
+        #         while is_recording:
+        #             if len(audio_buffer) >= CHUNK:
+        #                 chunk = audio_buffer[:CHUNK]
+        #                 audio_buffer = audio_buffer[STRIDE:]
+        #                 emotion_prediction = predict_emotion(chunk, output_probs=False, sampling_rate=RATE)  # Utilisez votre modèle ici
+        #             # time.sleep(0.1)
+        #     if is_recording:
+        #         audio_buffer = np.array([])
+        #         stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True,
+        #                         frames_per_buffer=CHUNK, stream_callback=audio_callback)
+        #         stream.start_stream()
+        #         threading.Thread(target=predict_emotion_thread, daemon=True).start()
+        #         st.write("Enregistrement en cours...")
+        #     else:
+        #         stream.stop_stream()
+        #         stream.close()
+        #         st.write("Enregistrement arrêté.")
+        # emotion_display = st.empty()
+        # while is_recording:
+        #     emotion_display.write(f"Émotion détectée : {emotion_prediction}")
+        #     # time.sleep(0.1)
+        # audio.terminate()
+            # stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
+            # frames = []
+            # real_time_emotions = []
+            # while not stop_button:
+            #     data = stream.read(CHUNK)
+            #     frames.append(data)
+            #     # Traitement en temps réel (par tranche de 1 seconde)
+            #     if len(frames) >= RATE // CHUNK:
+            #         audio_segment = np.frombuffer(b''.join(frames[-(RATE // CHUNK):]), dtype=np.int16)
+            #         emotion = predict_emotion(audio_segment, output_probs=False, sampling_rate=RATE)
+            #         real_time_emotions.append(emotion)
+            #         emotion_placeholder.line_chart(real_time_emotions)  # Affichage graphique des émotions
+            # # Arrêt de l'enregistrement
+            # stream.stop_stream()
+            # stream.close()
+            # audio.terminate()
+            # # Sauvegarde de l'audio enregistré
+            # wf = wave.open("output.wav", "wb")
+            # wf.setnchannels(CHANNELS)
+            # wf.setsampwidth(audio.get_sample_size(FORMAT))
+            # wf.setframerate(RATE)
+            # wf.writeframes(b"".join(frames))
+            # wf.close()
+            # # Prédiction finale sur tout l'audio enregistré
+            # full_audio_data = np.frombuffer(b''.join(frames), dtype=np.int16)
+            # final_emotion = predict_emotion(full_audio_data)
+            # final_emotion_placeholder.write(f"Émotion finale prédite : {final_emotion}")
+    with tab3:
+        st.header("📝 Speech2Text Transcription")
+        st.write("Here you can get the audio transcript.")
+        save = st.checkbox("Save transcription to .txt", value=False, key="save-transcript")
+        ############################# A décommenté quand ce sera débogué
+        if st.button("Transcribe", key="transcribe-button"):
+        #     # Fonction pour transcrire l'audio
+            # transcription = transcribe_audio(st.audio)
+        #     # Charger et transcrire l'audio
+        #     # audio, rate = load_audio(audio_file_path) # (re)chargement de l'audio si nécessaire
+            # transcription = transcribe_audio(audio_file, sampling_rate=16000)
+        #     # Afficher la transcription
+            # st.write("Transcription :", transcription)
+            st.success("Audio registered successfully.")
+        #     if save:
+        #         file_path = "transcript.txt"
+        #         # Write the text to the file
+        #         with open(file_path, "w") as file:
+        #             file.write(transcription)
+        #         st.success(f"Text saved to {file_path}")