File size: 1,434 Bytes
d8755a6
78fbf94
c9ca9ad
78fbf94
401d010
c9ca9ad
78fbf94
d8755a6
 
 
401d010
d8755a6
 
 
401d010
d8755a6
401d010
d8755a6
 
9a533bd
 
d8755a6
 
 
c9ca9ad
 
e7fc258
 
 
 
 
 
 
 
 
 
 
 
 
 
401d010
 
 
c9ca9ad
e7fc258
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from speechbrain.pretrained.interfaces import foreign_class
import gradio as gr
import os

import warnings
warnings.filterwarnings("ignore")

# Loading the speechbrain emotion detection model
learner = foreign_class(
    source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
    pymodule_file="custom_interface.py", 
    classname="CustomEncoderWav2vec2Classifier"
)

# Building prediction function for gradio
emotion_dict = {
    'sad': 'Sad', 
    'hap': 'Happy',
    'ang': 'Anger',
    'fea': 'Fear',
    'sur': 'Surprised',
    'neu': 'Neutral'
}



# Assuming emotion_dict and learner are defined elsewhere in your code
# and learner.classify_file is a method that classifies the audio file

def predict_emotion(audio, rec_file):
    rec_path = os.path.join("rec", rec_file.name)
    # Assuming you want to use the audio file from the 'rec' directory for prediction
    out_prob, score, index, text_lab = learner.classify_file(rec_path)
    return emotion_dict[text_lab[0]]

# Loading gradio interface
inputs = [
    gr.inputs.Audio(label="Input Audio", type="file"),
    gr.inputs.File(label="Choose file from rec directory", type="file", default="rec/")
]
outputs = "text"
title = "ML Speech Emotion Detection"
description = "Speechbrain powered wav2vec 2.0 pretrained model on IEMOCAP dataset using Gradio."

gr.Interface(fn=predict_emotion, inputs=inputs, outputs=outputs, title=title, description=description).launch()