pronunciation-scoring-chinese

Sleeping

File size: 3,648 Bytes

e34aefe
 
c4c15bc
b784e4c
e34aefe
a96c800
b827155
 
 
a96c800
 
 
 
 
 
 
 
 
 
 
 
 
 
b784e4c
a96c800
 
 
 
 
 
b784e4c
a96c800
 
 
 
 
 
 
 
 
 
 
b784e4c
e34aefe
a96c800
 
 
 
 
 
 
e34aefe
a96c800
b784e4c
 
 
 
 
 
 
 
a96c800
2fe25b6
a96c800
b784e4c
 
 
e34aefe
 
 
34aba6b

import gradio as gr
from transformers import pipeline
import numpy as np
import os

# Initialize classifiers with appropriate Chinese models
accuracy_classifier = pipeline(task="audio-classification", model="JohnJumon/pronunciation_accuracy")
fluency_classifier = pipeline(task="audio-classification", model="JohnJumon/fluency_accuracy")
prosodic_classifier = pipeline(task="audio-classification", model="JohnJumon/prosodic_accuracy")

def chinese_pronunciation_scoring(audio):
    accuracy_description = {
        'Extremely Poor': 'Extremely poor pronunciation and only one or two words are recognizable',
        'Poor': 'Poor, clumsy and rigid pronunciation of the sentence as a whole, with serious pronunciation mistakes',
        'Average': 'The overall pronunciation of the sentence is understandable, with many pronunciation mistakes and accent, but it does not affect the understanding of basic meanings',
        'Good': 'The overall pronunciation of the sentence is good, with a few pronunciation mistakes',
        'Excellent': 'The overall pronunciation of the sentence is excellent, with accurate phonology and no obvious pronunciation mistakes'
    }
    fluency_description = {
        'Very Influent': 'Intermittent, very influent speech, with lots of pauses, repetition, and stammering',
        'Influent': 'The speech is a little influent, with many pauses, repetition, and stammering',
        'Average': 'Fluent in general, with a few pauses, repetition, and stammering',
        'Fluent': 'Fluent without noticeable pauses or stammering'
    }
    prosodic_description = {
        'Poor': 'Poor intonation and lots of stammering and pauses, unable to read a complete sentence',
        'Unstable': 'Unstable speech speed, speak too fast or too slow, without the sense of rhythm',
        'Stable': 'Unstable speech speed, many stammering and pauses with a poor sense of rhythm',
        'Almost': 'Nearly correct intonation at a stable speaking speed, nearly smooth and coherent, but with little stammering and few pauses',
        'Perfect': 'Correct intonation at a stable speaking speed, speak with cadence, and can speak like a native'
    }

    # Run classifiers on the input audio
    accuracy = accuracy_classifier(audio)
    fluency = fluency_classifier(audio)
    prosodic = prosodic_classifier(audio)

    # Process results
    result = {
        'accuracy': accuracy,
        'fluency': fluency,
        'prosodic': prosodic
    }

    for category, scores in result.items():
        max_score_label = max(scores, key=lambda x: x['score'])['label']
        result[category] = max_score_label

    return result['accuracy'], accuracy_description[result['accuracy']], result['fluency'], fluency_description[result['fluency']], result['prosodic'], prosodic_description[result['prosodic']]

# Setting up the Gradio interface
gradio_app = gr.Interface(
    chinese_pronunciation_scoring,
    inputs=gr.Audio(sources="microphone", type="filepath"),
    outputs=[
        gr.Label(label="Accuracy Result"),
        gr.Textbox(interactive=False, show_label=False),
        gr.Label(label="Fluency Result"),
        gr.Textbox(interactive=False, show_label=False),
        gr.Label(label="Prosodic Result"),
        gr.Textbox(interactive=False, show_label=False)
    ],
    title="Trying to make a Chinese Pronunciation Scoring app like the one by JohnJumon for English",
    description="This app will score your Chinese pronunciation accuracy, fluency, and prosodic (intonation)",
    examples=[
        [os.path.join(os.path.dirname(__file__),"audio.wav")],
    ]
)

if __name__ == "__main__":
    gradio_app.launch()