Spaces:
Runtime error
Runtime error
| # AUTOGENERATED! DO NOT EDIT! | |
| # %% auto 0 | |
| __all__ = ['learn', 'categories', 'audio', 'label', 'inf', 'extract_emotion', 'get_y', 'classify_audio'] | |
| from fastai.vision.all import * | |
| import gradio as gr | |
| import matplotlib.pyplot as plt | |
| import librosa | |
| import librosa.display | |
| from pathlib import Path | |
| import os | |
| def extract_emotion(file_name: str) -> str: | |
| """ | |
| Given the name of the file, return the label | |
| indicating the emotion associated with the audio. | |
| """ | |
| # Split the filename at each underscore | |
| parts = file_name.split('_') | |
| # Label is after second | |
| label_with_extension = parts[-1] | |
| # Remove the extension to get only the label | |
| label = label_with_extension[:-4] | |
| return label | |
| def get_y(filepath): return extract_emotion(str(filepath).split("/")[-1]) | |
| # Load Learner | |
| learn = load_learner("emotion_model.pkl") | |
| categories = learn.dls.vocab | |
| def classify_audio(audio_file): | |
| """ | |
| Takes the audio file and returns its | |
| prediction of emotions along with probabilities. | |
| """ | |
| # Load the audio file | |
| sample, sample_rate = librosa.load(audio_file, sr=None, duration=20) | |
| # Create spectogram | |
| S = librosa.feature.melspectrogram(y=sample, sr=sample_rate) | |
| S_DB = librosa.power_to_db(S, ref=np.max) | |
| # Prepare the figure for saving the spectrogram | |
| fig, ax = plt.subplots() | |
| fig.tight_layout(pad=0) | |
| # Create the spectogram image | |
| img = librosa.display.specshow(S_DB, sr=sample_rate, x_axis='time', | |
| y_axis='mel', ax=ax) | |
| # Turn off the axis for saving | |
| plt.axis('off') | |
| # Save the spectogram temporarily | |
| temp_img_path = Path("temp_spectogram.png") | |
| plt.savefig(temp_img_path) | |
| pred,idx, probs = learn.predict(temp_img_path) | |
| # Remove the temporary spectogram image | |
| os.remove(temp_img_path) | |
| return dict(zip(categories, map(float, probs))) | |
| description = """ | |
| # Emotion Recognition from Audio | |
| Welcome to the app that recognizes emotion from the audio! | |
| ## Instructions: | |
| - Upload or record audio (no more than 20 seconds for now) | |
| - Wait for processing and prediction from the model. | |
| ## Emotions the app recognizes: | |
| 1) Anger | |
| 2) Disgust | |
| 3) Fear | |
| 4) Happiness | |
| 5) Pleasant Surprise | |
| 6) Sadness | |
| 7) Neutral | |
| ## About: | |
| This application is actually using a computer vision model (an adaptation of ResNet) for detection and the model | |
| has been trained on a relatively small dataset of 2,380 recordings from two actors saying phrases in different emotions. | |
| For more information, visit this [Github repo](https://github.com/KyawHtetWin/issem-machine-learning/tree/main/audio_emotion_detector) | |
| """ | |
| audio = gr.Audio(type="filepath", label="Upload Audio") | |
| label = gr.Label() | |
| md = gr.Markdown(description) | |
| # Gradio Interface | |
| inf = gr.Interface(fn=classify_audio, inputs=audio, outputs=label, title="Emotion Recognition", description=md) | |
| inf.launch(share=True) | |