File size: 1,067 Bytes
e34aefe c4c15bc e34aefe 2e9243a e34aefe c4c15bc e34aefe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import gradio as gr
from transformers import pipeline
import numpy as np
accuracy_classifier = pipeline(task="audio-classification", model="JohnJumon/pronunciation_accuracy")
fluency_classifier = pipeline(task="audio-classification", model="JohnJumon/fluency_accuracy")
prosodic_classifier = pipeline(task="audio-classification", model="JohnJumon/prosodic_accuracy")
def pronunciation_scoring(audio):
y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
accuracy = accuracy_classifier(y)
fluency = fluency_classifier(y)
prosodic = prosodic_classifier(y)
result = {
'accuracy': accuracy,
'fluency': fluency,
'prosodic': prosodic
}
for category, scores in result.items():
max_score_label = max(scores, key=lambda x: x['score'])['label']
result[category] = max_score_label
return result
gradio_app = gr.Interface(
pronunciation_scoring,
inputs=gr.Audio(sources=["microphone"]),
outputs=gr.Label(label="Result"),
title="Pronunciation Scoring",
)
if __name__ == "__main__":
gradio_app.launch() |