Audio-MNIST / app.py
pksx01's picture
Adding app.py
6215757
raw
history blame
710 Bytes
from fastai.vision.all import *
import librosa
import gradio as gr
learn = load_learner('audio_mnist_classifier_v1.pkl')
categories = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
def mel_spectrogram_tfm(file):
y, sr = librosa.load(file)
spec = librosa.feature.melspectrogram(y, sr=sr, n_fft=2048, hop_length=512)
spec_db = librosa.amplitude_to_db(spec, ref=np.max)
return spec_db
def classify(audio):
img = mel_spectrogram_tfm(audio)
img = PILImage.resize((225,225))
pred, idx, prob = learn.predict(img)
return dict(zip(categories, map(float, prob)))
gr.Interface(fn=classify, inputs=gr.Audio(source="microphone", type="filepath"), outputs=gr.outputs.Label()).launch()