aikitty commited on
Commit
a96c800
·
verified ·
1 Parent(s): b784e4c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -39
app.py CHANGED
@@ -3,46 +3,54 @@ from transformers import pipeline
3
  import numpy as np
4
  import os
5
 
6
- accuracy_classifier = pipeline(task="audio-classification", model="JohnJumon/pronunciation_accuracy")
7
- fluency_classifier = pipeline(task="audio-classification", model="JohnJumon/fluency_accuracy")
8
- prosodic_classifier = pipeline(task="audio-classification", model="JohnJumon/prosodic_accuracy")
9
-
10
- def pronunciation_scoring(audio):
11
- accuracy_description = {
12
- 'Extremely Poor': 'Extremely poor pronunciation and only one or two words are recognizable',
13
- 'Poor': 'Poor, clumsy and rigid pronunciation of the sentence as a whole, with serious pronunciation mistakes',
14
- 'Average': 'The overall pronunciation of the sentence is understandable, with many pronunciation mistakes and accent, but it does not affect the understanding of basic meanings',
15
- 'Good': 'The overall pronunciation of the sentence is good, with a few pronunciation mistakes',
16
- 'Excellent': 'The overall pronunciation of the sentence is excellent, with accurate phonology and no obvious pronunciation mistakes'
 
 
 
 
 
 
 
17
  }
18
- fluency_description = {
19
- 'Very Influent': 'Intermittent, very influent speech, with lots of pauses, repetition, and stammering',
20
- 'Influent': 'The speech is a little influent, with many pauses, repetition, and stammering',
21
- 'Average': 'Fluent in general, with a few pauses, repetition, and stammering',
22
- 'Fluent': 'Fluent without noticeable pauses or stammering'
 
23
  }
24
- prosodic_description = {
25
- 'Poor': 'Poor intonation and lots of stammering and pauses, unable to read a complete sentence',
26
- 'Unstable': 'Unstable speech speed, speak too fast or too slow, without the sense of rhythm',
27
- 'Stable': 'Unstable speech speed, many stammering and pauses with a poor sense of rhythm',
28
- 'Almost': 'Nearly correct intonation at a stable speaking speed, nearly smooth and coherent, but with little stammering and few pauses',
29
- 'Perfect': 'Correct intonation at a stable speaking speed, speak with cadence, and can speak like a native'
 
 
 
 
 
30
  }
31
- accuracy = accuracy_classifier(audio)
32
- fluency = fluency_classifier(audio)
33
- prosodic = prosodic_classifier(audio)
34
- result = {
35
- 'accuracy': accuracy,
36
- 'fluency': fluency,
37
- 'prosodic': prosodic
38
- }
39
- for category, scores in result.items():
40
- max_score_label = max(scores, key=lambda x: x['score'])['label']
41
- result[category] = max_score_label
42
- return result['accuracy'], accuracy_description[result['accuracy']], result['fluency'], fluency_description[result['fluency']], result['prosodic'], prosodic_description[result['prosodic']]
43
 
 
 
 
 
 
 
 
44
  gradio_app = gr.Interface(
45
- pronunciation_scoring,
46
  inputs=gr.Audio(sources="microphone", type="filepath"),
47
  outputs=[
48
  gr.Label(label="Accuracy Result"),
@@ -51,13 +59,13 @@ gradio_app = gr.Interface(
51
  gr.Textbox(interactive=False, show_label=False),
52
  gr.Label(label="Prosodic Result"),
53
  gr.Textbox(interactive=False, show_label=False)
54
- ],
55
- title="Pronunciation Scoring",
56
- description="This app will score your pronunciation accuracy, fluency, and prosodic (intonation)",
57
  examples=[
58
  [os.path.join(os.path.dirname(__file__),"audio.wav")],
59
  ]
60
  )
61
 
62
  if __name__ == "__main__":
63
- gradio_app.launch()
 
3
  import numpy as np
4
  import os
5
 
6
+ # Initialize classifiers with appropriate Chinese models
7
+ accuracy_classifier = pipeline(task="audio-classification", model="Chinese_pronunciation_accuracy")
8
+ fluency_classifier = pipeline(task="audio-classification", model="Chinese_fluency_accuracy")
9
+ prosodic_classifier = pipeline(task="audio-classification", model="Chinese_prosodic_accuracy")
10
+
11
+ def chinese_pronunciation_scoring(audio):
12
+ accuracy_description = {
13
+ 'Extremely Poor': 'Extremely poor pronunciation and only one or two words are recognizable',
14
+ 'Poor': 'Poor, clumsy and rigid pronunciation of the sentence as a whole, with serious pronunciation mistakes',
15
+ 'Average': 'The overall pronunciation of the sentence is understandable, with many pronunciation mistakes and accent, but it does not affect the understanding of basic meanings',
16
+ 'Good': 'The overall pronunciation of the sentence is good, with a few pronunciation mistakes',
17
+ 'Excellent': 'The overall pronunciation of the sentence is excellent, with accurate phonology and no obvious pronunciation mistakes'
18
+ }
19
+ fluency_description = {
20
+ 'Very Influent': 'Intermittent, very influent speech, with lots of pauses, repetition, and stammering',
21
+ 'Influent': 'The speech is a little influent, with many pauses, repetition, and stammering',
22
+ 'Average': 'Fluent in general, with a few pauses, repetition, and stammering',
23
+ 'Fluent': 'Fluent without noticeable pauses or stammering'
24
  }
25
+ prosodic_description = {
26
+ 'Poor': 'Poor intonation and lots of stammering and pauses, unable to read a complete sentence',
27
+ 'Unstable': 'Unstable speech speed, speak too fast or too slow, without the sense of rhythm',
28
+ 'Stable': 'Unstable speech speed, many stammering and pauses with a poor sense of rhythm',
29
+ 'Almost': 'Nearly correct intonation at a stable speaking speed, nearly smooth and coherent, but with little stammering and few pauses',
30
+ 'Perfect': 'Correct intonation at a stable speaking speed, speak with cadence, and can speak like a native'
31
  }
32
+
33
+ # Run classifiers on the input audio
34
+ accuracy = accuracy_classifier(audio)
35
+ fluency = fluency_classifier(audio)
36
+ prosodic = prosodic_classifier(audio)
37
+
38
+ # Process results
39
+ result = {
40
+ 'accuracy': accuracy,
41
+ 'fluency': fluency,
42
+ 'prosodic': prosodic
43
  }
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ for category, scores in result.items():
46
+ max_score_label = max(scores, key=lambda x: x['score'])['label']
47
+ result[category] = max_score_label
48
+
49
+ return result['accuracy'], accuracy_description[result['accuracy']], result['fluency'], fluency_description[result['fluency']], result['prosodic'], prosodic_description[result['prosodic']]
50
+
51
+ # Setting up the Gradio interface
52
  gradio_app = gr.Interface(
53
+ chinese_pronunciation_scoring,
54
  inputs=gr.Audio(sources="microphone", type="filepath"),
55
  outputs=[
56
  gr.Label(label="Accuracy Result"),
 
59
  gr.Textbox(interactive=False, show_label=False),
60
  gr.Label(label="Prosodic Result"),
61
  gr.Textbox(interactive=False, show_label=False)
62
+ ],
63
+ title="Chinese Pronunciation Scoring",
64
+ description="This app will score your Chinese pronunciation accuracy, fluency, and prosodic (intonation)",
65
  examples=[
66
  [os.path.join(os.path.dirname(__file__),"audio.wav")],
67
  ]
68
  )
69
 
70
  if __name__ == "__main__":
71
+ gradio_app.launch()