Spaces:
Sleeping
Sleeping
# AUTOGENERATED! DO NOT EDIT! | |
# %% auto 0 | |
__all__ = ['learn', 'categories', 'audio', 'label', 'inf', 'extract_emotion', 'get_y', 'classify_audio'] | |
from fastai.vision.all import * | |
import gradio as gr | |
import matplotlib.pyplot as plt | |
import librosa | |
import librosa.display | |
from pathlib import Path | |
import os | |
def extract_emotion(file_name: str) -> str: | |
""" | |
Given the name of the file, return the label | |
indicating the emotion associated with the audio. | |
""" | |
# Split the filename at each underscore | |
parts = file_name.split('_') | |
# Label is after second | |
label_with_extension = parts[-1] | |
# Remove the extension to get only the label | |
label = label_with_extension[:-4] | |
return label | |
def get_y(filepath): return extract_emotion(str(filepath).split("/")[-1]) | |
# Load Learner | |
learn = load_learner("emotion_model.pkl") | |
categories = learn.dls.vocab | |
def classify_audio(audio_file): | |
""" | |
Takes the audio file and returns its | |
prediction of emotions along with probabilities. | |
""" | |
# Load the audio file | |
sample, sample_rate = librosa.load(audio_file, sr=None, duration=20) | |
# Create spectogram | |
S = librosa.feature.melspectrogram(y=sample, sr=sample_rate) | |
S_DB = librosa.power_to_db(S, ref=np.max) | |
# Prepare the figure for saving the spectrogram | |
fig, ax = plt.subplots() | |
fig.tight_layout(pad=0) | |
# Create the spectogram image | |
img = librosa.display.specshow(S_DB, sr=sample_rate, x_axis='time', | |
y_axis='mel', ax=ax) | |
# Turn off the axis for saving | |
plt.axis('off') | |
# Save the spectogram temporarily | |
temp_img_path = Path("temp_spectogram.png") | |
plt.savefig(temp_img_path) | |
pred,idx, probs = learn.predict(temp_img_path) | |
# Remove the temporary spectogram image | |
os.remove(temp_img_path) | |
return dict(zip(categories, map(float, probs))) | |
description = """ | |
# Emotion Recognition from Audio | |
Welcome to the app that recognizes emotion from the audio! | |
## Instructions: | |
- Upload or record audio (no more than 20 seconds for now) | |
- Wait for processing and prediction from the model. | |
## Emotions the app recognizes: | |
1) Anger | |
2) Disgust | |
3) Fear | |
4) Happiness | |
5) Pleasant Surprise | |
6) Sadness | |
7) Neutral | |
## About: | |
This application is actually using a computer vision model (an adaptation of ResNet) for detection and the model | |
has been trained on a relatively small dataset of 2,380 recordings from two actors saying phrases in different emotions. | |
For more information, visit this [Github repo](https://github.com/KyawHtetWin/issem-machine-learning/tree/main/audio_emotion_detector) | |
""" | |
audio = gr.Audio(type="filepath", label="Upload Audio") | |
label = gr.Label() | |
md = gr.Markdown(description) | |
# Gradio Interface | |
inf = gr.Interface(fn=classify_audio, inputs=audio, outputs=label, title="Emotion Recognition", description=md) | |
inf.launch(share=True) | |