Spaces:
Sleeping
Sleeping
File size: 2,456 Bytes
af1a473 bf756e1 af1a473 112a3e2 15bf7ba af1a473 112a3e2 af1a473 112a3e2 af1a473 112a3e2 af1a473 112a3e2 af1a473 15bf7ba af1a473 15bf7ba 112a3e2 6cb8d02 af1a473 6cb8d02 af1a473 6cb8d02 af1a473 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import os
import gradio as gr
import openai
import speech_recognition as sr
import time
# Set OpenAI API key
openai.api_key = os.environ.get("OPENAI_API_KEY")
def generate_text():
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "Generate a short paragraph (2-3 sentences) for an English learner to read aloud."},
{"role": "user", "content": "Create a practice text."}
]
)
return response.choices[0].message['content']
def get_pronunciation_feedback(original_text, transcription):
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful pronunciation assistant. Compare the generated text with the user's transcription and provide feedback on how the user can improve their pronunciation. Single out specific words they pronounced incorrectly and give tips on how to improve, like for example 'schedule' can be pronounced as 'sked-jool'."},
{"role": "user", "content": f"Original text: '{original_text}'\nTranscription: '{transcription}'\nProvide pronunciation feedback."}
]
)
return response.choices[0].message['content']
def transcribe_audio_realtime(audio):
recognizer = sr.Recognizer()
with sr.AudioFile(audio) as source:
audio_data = recognizer.record(source)
try:
return recognizer.recognize_google(audio_data)
except sr.UnknownValueError:
return "Could not understand audio"
except sr.RequestError:
return "Could not request results from the speech recognition service"
def practice_pronunciation(audio):
original_text = generate_text()
transcription = transcribe_audio_realtime(audio)
feedback = get_pronunciation_feedback(original_text, transcription)
return original_text, transcription, feedback
# Gradio interface
demo = gr.Interface(
fn=practice_pronunciation,
inputs=[
gr.Audio(type="filepath")
],
outputs=[
gr.Textbox(label="Text to Read"),
gr.Textbox(label="Your Transcription"),
gr.Textbox(label="Pronunciation Feedback")
],
title="Pronunciation Practice Tool",
description="Read the generated text aloud. The system will transcribe your speech and provide pronunciation feedback.",
live=True
)
# Launch the app
if __name__ == "__main__":
demo.launch() |