speech_emotion_detection

Runtime error

File size: 2,196 Bytes

from speechbrain.pretrained.interfaces import foreign_class
import gradio as gr
import os
import warnings
warnings.filterwarnings("ignore")

# Function to get the list of audio files in the 'rec/' directory1
def get_audio_files_list(directory="rec"):
    try:
        return [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
    except FileNotFoundError:
        print("The 'rec' directory does not exist. Please make sure it is the correct path.")
        return []

# Loading the speechbrain emotion detection model
learner = foreign_class(
    source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
    pymodule_file="custom_interface.py", 
    classname="CustomEncoderWav2vec2Classifier"
)

# Building prediction function for Gradio
emotion_dict = {
    'sad': 'Sad', 
    'hap': 'Happy',
    'ang': 'Anger',
    'fea': 'Fear',
    'sur': 'Surprised',
    'neu': 'Neutral'
}

def predict_emotion(selected_audio):
    file_path = os.path.join("rec", selected_audio)
    out_prob, score, index, text_lab = learner.classify_file(file_path)
    return emotion_dict[text_lab[0]]

# Get the list of audio files for the dropdown
audio_files_list = get_audio_files_list()

# Function to return the selected audio file path
def get_audio_file_path(selected_audio):
    file_path = os.path.join("rec", selected_audio)
    return file_path

# Gradio components
dropdown = gr.Dropdown(label="Select Audio", choices=audio_files_list)
audio_player = gr.Audio(source="file", label="Listen to the selected audio")

# Update the audio player when a new selection is made from the dropdown
def update_audio(selected_audio):
    return get_audio_file_path(selected_audio)

# Connect the dropdown to the audio player using the update_audio function
dropdown.change(fn=update_audio, inputs=dropdown, outputs=audio_player)

# Update the Gradio interface to use both the dropdown and the audio player as inputs
interface = gr.Interface(
    fn=predict_emotion,
    inputs=[dropdown, audio_player],
    outputs="text",
    title="ML Speech Emotion Detection",
    description="Speechbrain powered wav2vec 2.0 pretrained model on IEMOCAP dataset using Gradio."
)

interface.launch()