File size: 1,961 Bytes
d8755a6
78fbf94
c9ca9ad
78fbf94
401d010
c9ca9ad
78fbf94
d8755a6
 
 
401d010
d8755a6
 
 
401d010
d8755a6
401d010
d8755a6
 
9a533bd
 
d8755a6
 
 
401d010
c9ca9ad
 
 
401d010
 
 
c9ca9ad
401d010
 
 
 
 
c9ca9ad
401d010
 
 
 
 
c9ca9ad
401d010
 
 
 
 
 
 
 
 
c9ca9ad
401d010
 
 
 
 
 
 
 
 
 
c9ca9ad
401d010
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from speechbrain.pretrained.interfaces import foreign_class
import gradio as gr
import os

import warnings
warnings.filterwarnings("ignore")

# Loading the speechbrain emotion detection model
learner = foreign_class(
    source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
    pymodule_file="custom_interface.py", 
    classname="CustomEncoderWav2vec2Classifier"
)

# Building prediction function for gradio
emotion_dict = {
    'sad': 'Sad', 
    'hap': 'Happy',
    'ang': 'Anger',
    'fea': 'Fear',
    'sur': 'Surprised',
    'neu': 'Neutral'
}

def predict_emotion(audio):
    out_prob, score, index, text_lab = learner.classify_file(audio.name)
    return emotion_dict[text_lab[0]]

def predict_emotion_from_file(file_path):
    audio = gr.Audio(file_path=file_path, source="upload", type="filepath")
    return predict_emotion(audio)

# Define the Gradio interface with the first tab for uploading an audio file
inputs = gr.inputs.Audio(label="Input Audio", type="file")
outputs = "text"
title = "ML Speech Emotion Detection"
description = "Speechbrain powered wav2vec 2.0 pretrained model on IEMOCAP dataset using Gradio."

# Define the second tab for selecting an audio file from the dropdown
rec_folder = "rec"
audio_files = [f for f in os.listdir(rec_folder) if f.endswith(('.wav', '.mp3'))]
audio_files = [os.path.join(rec_folder, f) for f in audio_files]
file_dropdown = gr.inputs.Dropdown(label="Select Preloaded Audio File", choices=audio_files)

# Create the Gradio interface with both tabs
iface = gr.Interface(
    fn=predict_emotion,
    inputs=inputs,
    outputs=outputs,
    title=title,
    description=description,
    tab_name="Upload Audio"
)

# Add the second tab to the interface
iface.add_tab(
    "Select Preloaded Audio",
    gr.Interface(
        fn=predict_emotion_from_file,
        inputs=file_dropdown,
        outputs=outputs,
        tab_name="Select Preloaded Audio"
    )
)

# Launch the Gradio app
iface.launch()