File size: 4,884 Bytes
af1a473
bf756e1
af1a473
 
a83e487
 
 
2516100
a83e487
af1a473
112a3e2
 
15bf7ba
af1a473
a83e487
 
 
 
 
 
 
 
 
 
 
 
af1a473
 
a83e487
 
 
 
 
 
 
 
2516100
a83e487
 
 
af1a473
 
 
a83e487
 
 
2516100
af1a473
 
a83e487
 
 
 
 
 
af1a473
371ca9c
 
 
af1a473
371ca9c
 
15bf7ba
2516100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371ca9c
2516100
 
 
 
 
371ca9c
2516100
 
 
371ca9c
2516100
 
 
 
 
371ca9c
2516100
371ca9c
 
 
6cb8d02
2516100
 
af1a473
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os
import gradio as gr
import openai
import speech_recognition as sr
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Set OpenAI API key
openai.api_key = os.environ.get("OPENAI_API_KEY")

def generate_text():
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "Generate exactly two simple sentences for English pronunciation practice. Do not include any instructions, comments, or additional text."},
                {"role": "user", "content": "Create two simple sentences for pronunciation practice."}
            ]
        )
        return response.choices[0].message['content'].strip()
    except Exception as e:
        logger.error(f"Error in generate_text: {str(e)}")
        return "Error generating text. Please try again."

def get_pronunciation_feedback(original_text, transcription):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful pronunciation assistant. Compare the generated text with the user's transcription and provide feedback on how the user can improve their pronunciation. Single out specific words they pronounced incorrectly and give tips on how to improve, like for example 'schedule' can be pronounced as 'sked-jool'."},
                {"role": "user", "content": f"Original text: '{original_text}'\nTranscription: '{transcription}'\nProvide pronunciation feedback."}
            ]
        )
        return response.choices[0].message['content']
    except Exception as e:
        logger.error(f"Error in get_pronunciation_feedback: {str(e)}")
        return "Error generating feedback. Please try again."

def transcribe_audio_realtime(audio):
    try:
        recognizer = sr.Recognizer()
        with sr.AudioFile(audio) as source:
            audio_data = recognizer.record(source)
        return recognizer.recognize_google(audio_data)
    except sr.UnknownValueError:
        return "Could not understand audio"
    except sr.RequestError as e:
        logger.error(f"Could not request results from the speech recognition service; {str(e)}")
        return "Error in speech recognition service"
    except Exception as e:
        logger.error(f"Error in transcribe_audio_realtime: {str(e)}")
        return "Error transcribing audio. Please try again."

def practice_pronunciation(audio, text_to_read):
    if not text_to_read:
        text_to_read = generate_text()
    transcription = transcribe_audio_realtime(audio)
    feedback = get_pronunciation_feedback(text_to_read, transcription)
    return text_to_read, transcription, feedback

# Custom CSS for improved styling
custom_css = """
.container {max-width: 800px; margin: auto; padding: 20px;}
.title {text-align: center; color: #2c3e50; margin-bottom: 20px;}
.subtitle {text-align: center; color: #34495e; margin-bottom: 30px;}
.input-section, .output-section {background-color: #ecf0f1; padding: 20px; border-radius: 10px; margin-bottom: 20px;}
.input-section h3, .output-section h3 {color: #2980b9; margin-bottom: 10px;}
.button-primary {background-color: #3498db !important;}
.button-secondary {background-color: #2ecc71 !important;}
"""

# Gradio interface with improved UI
with gr.Blocks(css=custom_css) as demo:
    gr.HTML("<div class='container'>")
    gr.HTML("<h1 class='title'>Pronunciation Practice Tool</h1>")
    gr.HTML("<p class='subtitle'>Improve your English pronunciation with AI-powered feedback</p>")
    
    with gr.Box(className="input-section"):
        gr.HTML("<h3>Step 1: Get Text to Read</h3>")
        with gr.Row():
            text_to_read = gr.Textbox(label="Text to Read", placeholder="Click 'Generate New Text' or type your own text here")
            generate_button = gr.Button("Generate New Text", variant="primary", className="button-primary")
    
    with gr.Box(className="input-section"):
        gr.HTML("<h3>Step 2: Record Your Voice</h3>")
        audio_input = gr.Audio(type="filepath", label="Record your voice reading the text above")
    
    with gr.Box(className="output-section"):
        gr.HTML("<h3>Step 3: Get Feedback</h3>")
        with gr.Row():
            transcription_output = gr.Textbox(label="Your Transcription", lines=3)
            feedback_output = gr.Textbox(label="Pronunciation Feedback", lines=5)
    
    submit_button = gr.Button("Submit for Feedback", variant="secondary", className="button-secondary")
    
    generate_button.click(generate_text, outputs=text_to_read)
    submit_button.click(practice_pronunciation, inputs=[audio_input, text_to_read], outputs=[text_to_read, transcription_output, feedback_output])

gr.HTML("</div>")

# Launch the app
if __name__ == "__main__":
    demo.launch()