File size: 2,940 Bytes
9524ef8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from tensorflow import keras
import os
import soundfile as sf
import numpy as np
import librosa
import gradio as gr
import seaborn as sns
import pandas as pd
import plotly.express as px
model = keras.models.load_model('/kaggle/input/emotions/emotion (1).h5')
labels = ['Angry', 'Disgusted', 'Fearful', 'Happy', 'Neutral', 'Sad', 'Suprised']
# def load()

def predict(audio):
#     audio_dir = ('/kaggle/input/sample-audios/')
#     audio = os.path.join(audio_dir,audio_link)
    wave, sr = librosa.load(audio, sr=None)
    segment_dur_secs = 3  
    segment_length = sr * segment_dur_secs
    num_sections = int(np.ceil(len(wave) / segment_length))
    split = []
    paths =[]
    for i in range(num_sections):
        t = wave[i * segment_length: (i + 1) * segment_length]
        split.append(t)
        
    out_dir = ('audio_data/splits/')
    os.makedirs(out_dir, exist_ok=True)
    for i in range(num_sections):
        recording_name = os.path.basename(audio[:-4])
        out_file = f"{recording_name}_{str(i)}.wav"
        sf.write(os.path.join(out_dir, out_file), split[i], sr)
        paths.append(os.path.join(out_dir, out_file))
        
            
    predicted_features = pd.DataFrame(columns=['features'])
    counter=0
    for path in paths:
        X, sample_rate = librosa.load(path
                                      ,duration=2.5
                                      ,sr=44100
                                      ,offset=0.5
                                     )
        sample_rate = np.array(sample_rate)

        # mean as the feature. Could do min and max etc as well. 
        mfccs = np.mean(librosa.feature.mfcc(y=X, 
                                            sr=sample_rate, 
                                            n_mfcc=13),
                        axis=0)
        predicted_features.loc[counter] = [mfccs]
        counter=counter+1
    predicted_features = pd.DataFrame(predicted_features['features'].values.tolist())
    predicted_features.dropna(inplace=True)
    preds = model.predict(predicted_features)

    preds=preds.argmax(axis=1)
    df_preds = pd.DataFrame(preds,columns = ['prediction'])
    emotions = []
    for i in df_preds['prediction']:
        emotion = labels[int(i)]
        emotions.append(emotion)
    df_preds['emotion'] = emotions
    df_preds = df_preds.reset_index()
    import plotly.io as pio
    fig = px.line(df_preds, x="index", y="emotion", title='Life expectancy in Canada')
#     plt = sns.lineplot(df_preds,x='index',y='emotion');
#     plt.set_xlabel('samples(each in 3s interval)');
    return fig

# outputs = gr.Plot()
title = "Emotion recognition"
description = "This model can shows how speaker emotion changes over the speech"

infr = gr.Interface(fn=predict,
                    inputs=gr.Audio(type="filepath",),
                    outputs=outputs,
                    title=title,description=description,interpretation='default',)
infr.launch()