File size: 2,966 Bytes
af1a473
bf756e1
af1a473
 
 
112a3e2
 
15bf7ba
af1a473
112a3e2
af1a473
 
9da39f9
 
af1a473
 
9da39f9
af1a473
 
112a3e2
 
 
 
 
 
af1a473
112a3e2
af1a473
 
 
 
 
 
 
 
 
 
 
 
371ca9c
 
 
af1a473
371ca9c
 
15bf7ba
 
371ca9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6cb8d02
af1a473
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os
import gradio as gr
import openai
import speech_recognition as sr

# Set OpenAI API key
openai.api_key = os.environ.get("OPENAI_API_KEY")

def generate_text():
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "Generate exactly two simple sentences for English pronunciation practice. Do not include any instructions, comments, or additional text."},
            {"role": "user", "content": "Create two simple sentences for pronunciation practice."}
        ]
    )
    return response.choices[0].message['content'].strip()

def get_pronunciation_feedback(original_text, transcription):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful pronunciation assistant. Compare the generated text with the user's transcription and provide feedback on how the user can improve their pronunciation. Single out specific words they pronounced incorrectly and give tips on how to improve, like for example 'schedule' can be pronounced as 'sked-jool'."},
            {"role": "user", "content": f"Original text: '{original_text}'\nTranscription: '{transcription}'\nProvide pronunciation feedback."}
        ]
    )
    return response.choices[0].message['content']

def transcribe_audio_realtime(audio):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio) as source:
        audio_data = recognizer.record(source)
    try:
        return recognizer.recognize_google(audio_data)
    except sr.UnknownValueError:
        return "Could not understand audio"
    except sr.RequestError:
        return "Could not request results from the speech recognition service"

def practice_pronunciation(audio, text_to_read):
    if not text_to_read:
        text_to_read = generate_text()
    transcription = transcribe_audio_realtime(audio)
    feedback = get_pronunciation_feedback(text_to_read, transcription)
    return text_to_read, transcription, feedback

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Pronunciation Practice Tool")
    gr.Markdown("Generate a text to read, then record yourself reading it. The system will provide pronunciation feedback.")
    
    with gr.Row():
        text_to_read = gr.Textbox(label="Text to Read")
        generate_button = gr.Button("Generate New Text")
    
    audio_input = gr.Audio(type="filepath", label="Record your voice")
    
    with gr.Row():
        transcription_output = gr.Textbox(label="Your Transcription")
        feedback_output = gr.Textbox(label="Pronunciation Feedback")
    
    submit_button = gr.Button("Submit")
    
    generate_button.click(generate_text, outputs=text_to_read)
    submit_button.click(practice_pronunciation, inputs=[audio_input, text_to_read], outputs=[text_to_read, transcription_output, feedback_output])

# Launch the app
if __name__ == "__main__":
    demo.launch()