audio_emotion_detector

Sleeping

App Files Files Community

audio_emotion_detector / app.py

kyawhtetpaingwin111

Update app.py

0f36c84 verified over 1 year ago

raw

history blame

2.84 kB

	# AUTOGENERATED! DO NOT EDIT!

	# %% auto 0
	__all__ = ['learn', 'categories', 'audio', 'label', 'inf', 'extract_emotion', 'get_y', 'classify_audio']

	from fastai.vision.all import *
	import gradio as gr
	import matplotlib.pyplot as plt
	import librosa
	import librosa.display
	from pathlib import Path
	import os

	def extract_emotion(file_name: str) -> str:
	"""
	Given the name of the file, return the label
	indicating the emotion associated with the audio.
	"""
	# Split the filename at each underscore
	parts = file_name.split('_')
	# Label is after second
	label_with_extension = parts[-1]
	# Remove the extension to get only the label
	label = label_with_extension[:-4]
	return label

	def get_y(filepath): return extract_emotion(str(filepath).split("/")[-1])

	# Load Learner
	learn = load_learner("emotion_model.pkl")
	categories = learn.dls.vocab

	def classify_audio(audio_file):
	"""
	Takes the audio file and returns its
	prediction of emotions along with probabilities.
	"""
	# Load the audio file
	sample, sample_rate = librosa.load(audio_file, sr=None, duration=20)
	# Create spectogram
	S = librosa.feature.melspectrogram(y=sample, sr=sample_rate)
	S_DB = librosa.power_to_db(S, ref=np.max)
	# Prepare the figure for saving the spectrogram
	fig, ax = plt.subplots()
	fig.tight_layout(pad=0)
	# Create the spectogram image
	img = librosa.display.specshow(S_DB, sr=sample_rate, x_axis='time',
	y_axis='mel', ax=ax)
	# Turn off the axis for saving
	plt.axis('off')
	# Save the spectogram temporarily
	temp_img_path = Path("temp_spectogram.png")
	plt.savefig(temp_img_path)

	pred,idx, probs = learn.predict(temp_img_path)

	# Remove the temporary spectogram image
	os.remove(temp_img_path)

	return dict(zip(categories, map(float, probs)))

	description = """
	# Emotion Recognition from Audio

	Welcome to the app that recognizes emotion from the audio!

	## Instructions:
	- Upload or record audio (no more than 20 seconds for now)
	- Wait for processing and prediction from the model.

	## Emotions the app recognizes:
	1) Anger
	2) Disgust
	3) Fear
	4) Happiness
	5) Pleasant Surprise
	6) Sadness
	7) Neutral

	## About:
	This application is actually using a computer vision model (an adaptation of ResNet) for detection and the model
	has been trained on a relatively small dataset of 2,380 recordings from two actors saying phrases in different emotions.

	For more information, visit this [Github repo](https://github.com/KyawHtetWin/issem-machine-learning/tree/main/audio_emotion_detector)
	"""

	audio = gr.Audio(type="filepath", label="Upload Audio")
	label = gr.Label()
	md = gr.Markdown(description)
	# Gradio Interface
	inf = gr.Interface(fn=classify_audio, inputs=audio, outputs=label, title="Emotion Recognition", description=md)
	inf.launch(share=True)