kyawhtetpaingwin111's picture
Update app.py
0f36c84 verified
raw
history blame
2.84 kB
# AUTOGENERATED! DO NOT EDIT!
# %% auto 0
__all__ = ['learn', 'categories', 'audio', 'label', 'inf', 'extract_emotion', 'get_y', 'classify_audio']
from fastai.vision.all import *
import gradio as gr
import matplotlib.pyplot as plt
import librosa
import librosa.display
from pathlib import Path
import os
def extract_emotion(file_name: str) -> str:
"""
Given the name of the file, return the label
indicating the emotion associated with the audio.
"""
# Split the filename at each underscore
parts = file_name.split('_')
# Label is after second
label_with_extension = parts[-1]
# Remove the extension to get only the label
label = label_with_extension[:-4]
return label
def get_y(filepath): return extract_emotion(str(filepath).split("/")[-1])
# Load Learner
learn = load_learner("emotion_model.pkl")
categories = learn.dls.vocab
def classify_audio(audio_file):
"""
Takes the audio file and returns its
prediction of emotions along with probabilities.
"""
# Load the audio file
sample, sample_rate = librosa.load(audio_file, sr=None, duration=20)
# Create spectogram
S = librosa.feature.melspectrogram(y=sample, sr=sample_rate)
S_DB = librosa.power_to_db(S, ref=np.max)
# Prepare the figure for saving the spectrogram
fig, ax = plt.subplots()
fig.tight_layout(pad=0)
# Create the spectogram image
img = librosa.display.specshow(S_DB, sr=sample_rate, x_axis='time',
y_axis='mel', ax=ax)
# Turn off the axis for saving
plt.axis('off')
# Save the spectogram temporarily
temp_img_path = Path("temp_spectogram.png")
plt.savefig(temp_img_path)
pred,idx, probs = learn.predict(temp_img_path)
# Remove the temporary spectogram image
os.remove(temp_img_path)
return dict(zip(categories, map(float, probs)))
description = """
# Emotion Recognition from Audio
Welcome to the app that recognizes emotion from the audio!
## Instructions:
- Upload or record audio (no more than 20 seconds for now)
- Wait for processing and prediction from the model.
## Emotions the app recognizes:
1) Anger
2) Disgust
3) Fear
4) Happiness
5) Pleasant Surprise
6) Sadness
7) Neutral
## About:
This application is actually using a computer vision model (an adaptation of ResNet) for detection and the model
has been trained on a relatively small dataset of 2,380 recordings from two actors saying phrases in different emotions.
For more information, visit this [Github repo](https://github.com/KyawHtetWin/issem-machine-learning/tree/main/audio_emotion_detector)
"""
audio = gr.Audio(type="filepath", label="Upload Audio")
label = gr.Label()
md = gr.Markdown(description)
# Gradio Interface
inf = gr.Interface(fn=classify_audio, inputs=audio, outputs=label, title="Emotion Recognition", description=md)
inf.launch(share=True)