File size: 1,067 Bytes
e34aefe
 
c4c15bc
e34aefe
2e9243a
 
 
e34aefe
 
c4c15bc
 
 
 
 
 
e34aefe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import gradio as gr
from transformers import pipeline
import numpy as np

accuracy_classifier = pipeline(task="audio-classification", model="JohnJumon/pronunciation_accuracy")
fluency_classifier = pipeline(task="audio-classification", model="JohnJumon/fluency_accuracy")
prosodic_classifier = pipeline(task="audio-classification", model="JohnJumon/prosodic_accuracy")

def pronunciation_scoring(audio):
  y = audio
  y = y.astype(np.float32)
  y /= np.max(np.abs(y))
  accuracy = accuracy_classifier(y)
  fluency = fluency_classifier(y)
  prosodic = prosodic_classifier(y)
  result = {
      'accuracy': accuracy,
      'fluency': fluency,
      'prosodic': prosodic
      }
  for category, scores in result.items():
    max_score_label = max(scores, key=lambda x: x['score'])['label']
    result[category] = max_score_label
  return result

gradio_app = gr.Interface(
    pronunciation_scoring,
    inputs=gr.Audio(sources=["microphone"]),
    outputs=gr.Label(label="Result"),
    title="Pronunciation Scoring",
)

if __name__ == "__main__":
    gradio_app.launch()