import gradio as gr import azure.cognitiveservices.speech as speechsdk def assess_pronunciation(audio_file, reference_text): # Configure Azure Speech Service speech_key = "12afe22c558a4f8d8bd28d6a67cdb9b0" service_region = "westus" speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) # Set up the audio configuration audio_config = speechsdk.audio.AudioConfig(filename=audio_file) # Create pronunciation assessment config pronunciation_config = speechsdk.PronunciationAssessmentConfig( reference_text=reference_text, grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark, granularity=speechsdk.PronunciationAssessmentGranularity.Phoneme ) pronunciation_config.enable_prosody_assessment() # Create the recognizer recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config) pronunciation_config.apply_to(recognizer) # Recognize speech and assess pronunciation result = recognizer.recognize_once() # Debug information print(f"Recognition result reason: {result.reason}") if result.reason == speechsdk.ResultReason.RecognizedSpeech: pronunciation_result = speechsdk.PronunciationAssessmentResult(result) # Extract and format the results accuracy_score = pronunciation_result.accuracy_score fluency_score = pronunciation_result.fluency_score completeness_score = pronunciation_result.completeness_score prosody_score = pronunciation_result.prosody_score return { "Accuracy": accuracy_score, "Fluency": fluency_score, "Completeness": completeness_score, "Prosody": prosody_score } elif result.reason == speechsdk.ResultReason.NoMatch: print("NOMATCH: Speech could not be recognized.") return {"Error": "Speech could not be recognized. Please try again with a clearer audio."} elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = speechsdk.CancellationDetails(result) print(f"CANCELED: Reason={cancellation_details.reason}") print(f"CANCELED: ErrorDetails={cancellation_details.error_details}") return {"Error": f"Speech recognition canceled: {cancellation_details.error_details}"} # Create Gradio interface interface = gr.Interface( fn=assess_pronunciation, inputs=[ gr.Audio(type="filepath"), # Audio input gr.Textbox(label="Reference Text", placeholder="Enter the reference text you are pronouncing") # Reference text input ], outputs="json", title="Chinese Pronunciation Checker" ) if __name__ == "__main__": interface.launch()