Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ import os
|
|
3 |
from gtts import gTTS
|
4 |
import gradio as gr
|
5 |
from groq import Groq
|
6 |
-
from datetime import datetime
|
7 |
|
8 |
# Load a smaller Whisper model for faster processing
|
9 |
try:
|
@@ -88,20 +88,65 @@ def chatbot(audio):
|
|
88 |
print(f"Error in chatbot function: {e}")
|
89 |
return "Sorry, there was an error processing your request.", None, chat_history
|
90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
# Gradio interface for real-time interaction with chat history display
|
92 |
iface = gr.Interface(
|
93 |
fn=chatbot,
|
94 |
inputs=gr.Audio(type="filepath"),
|
95 |
outputs=[
|
96 |
-
gr.
|
97 |
gr.Audio(type="filepath", label="Response Audio"),
|
98 |
],
|
99 |
live=True,
|
100 |
-
title="Audio Chatbot with Groq API",
|
101 |
description="Upload your audio, and the chatbot will transcribe and respond to it with a synthesized response.",
|
102 |
theme="default"
|
103 |
)
|
104 |
|
|
|
|
|
|
|
105 |
# Launch the Gradio app
|
106 |
if __name__ == "__main__":
|
107 |
iface.launch()
|
@@ -114,17 +159,20 @@ if __name__ == "__main__":
|
|
114 |
|
115 |
|
116 |
|
117 |
-
|
|
|
|
|
|
|
118 |
# import whisper
|
119 |
# import os
|
120 |
# from gtts import gTTS
|
121 |
# import gradio as gr
|
122 |
# from groq import Groq
|
|
|
123 |
|
124 |
# # Load a smaller Whisper model for faster processing
|
125 |
# try:
|
126 |
-
#
|
127 |
-
# model = whisper.load_model("tiny") #, weights_only=True) # Use "tiny" for faster processing
|
128 |
# except Exception as e:
|
129 |
# print(f"Error loading Whisper model: {e}")
|
130 |
# model = None
|
@@ -147,16 +195,6 @@ if __name__ == "__main__":
|
|
147 |
# try:
|
148 |
# chat_completion = client.chat.completions.create(
|
149 |
# messages=[{"role": "user", "content": user_input}],
|
150 |
-
# # messages=[
|
151 |
-
# # {
|
152 |
-
# # "role": "system",
|
153 |
-
# # "content": "You are a highly experienced Spoken English expert. Your task is to provide detailed feedback on the following transcription from an English learner. Focus on pronunciation, grammar, fluency, vocabulary usage, sentence structure, coherence, cohesion, and intonation. Provide feedback in these sections: Summary of Feedback, Detailed Mistake Identification, Suggestions for Improvement, and Encouragement."
|
154 |
-
# # },
|
155 |
-
# # {
|
156 |
-
# # "role": "user",
|
157 |
-
# # "content": f"Please provide feedback on the following spoken English: {user_input}"
|
158 |
-
# # }
|
159 |
-
# # ],
|
160 |
# model="llama3-8b-8192", # Replace with your desired model
|
161 |
# timeout=20 # Increased timeout to 20 seconds
|
162 |
# )
|
@@ -191,139 +229,23 @@ if __name__ == "__main__":
|
|
191 |
# if not user_text.strip():
|
192 |
# return "Could not understand the audio. Please try speaking more clearly.", None, chat_history
|
193 |
|
|
|
|
|
|
|
194 |
# # Display transcription in chat history
|
195 |
-
# chat_history.append(("User", user_text))
|
196 |
|
197 |
# # Step 2: Get LLM response from Groq
|
198 |
# response_text = get_llm_response(user_text)
|
199 |
|
200 |
# # Step 3: Convert the response text to speech
|
201 |
# output_audio = text_to_speech(response_text)
|
202 |
-
|
203 |
-
# # Append the latest interaction to the chat history
|
204 |
-
# chat_history.append(("Chatbot", response_text))
|
205 |
-
|
206 |
-
# # Format the chat history for display
|
207 |
-
# formatted_history = "\n".join([f"{speaker}: {text}" for speaker, text in chat_history])
|
208 |
-
|
209 |
-
# return formatted_history, output_audio, chat_history
|
210 |
-
|
211 |
-
# except Exception as e:
|
212 |
-
# print(f"Error in chatbot function: {e}")
|
213 |
-
# return "Sorry, there was an error processing your request.", None, chat_history
|
214 |
-
|
215 |
-
# # Gradio interface for real-time interaction with chat history display
|
216 |
-
# iface = gr.Interface(
|
217 |
-
# fn=chatbot,
|
218 |
-
# inputs=gr.Audio(type="filepath"),
|
219 |
-
# outputs=[
|
220 |
-
# gr.Textbox(label="Chat History"), # Display chat history
|
221 |
-
# gr.Audio(type="filepath", label="Response Audio"),
|
222 |
-
# ],
|
223 |
-
# live=True,
|
224 |
-
# title="Audio Chatbot with Groq API",
|
225 |
-
# description="Upload your audio, and the chatbot will transcribe and respond to it with a synthesized response.",
|
226 |
-
# theme="default"
|
227 |
-
# )
|
228 |
-
|
229 |
-
# # Launch the Gradio app
|
230 |
-
# if __name__ == "__main__":
|
231 |
-
# iface.launch()
|
232 |
-
|
233 |
-
|
234 |
-
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
# # Import libraries
|
241 |
-
# import whisper
|
242 |
-
# import os
|
243 |
-
# from gtts import gTTS
|
244 |
-
# import gradio as gr
|
245 |
-
# from groq import Groq
|
246 |
-
|
247 |
-
# # Load a smaller Whisper model for faster processing
|
248 |
-
# try:
|
249 |
-
# model = whisper.load_model("base") # Use "base" for faster processing
|
250 |
-
# except Exception as e:
|
251 |
-
# print(f"Error loading Whisper model: {e}")
|
252 |
-
# model = None
|
253 |
-
|
254 |
-
# # Set up Groq API client using environment variable
|
255 |
-
# GROQ_API_TOKEN = os.getenv("GROQ_API")
|
256 |
-
# if not GROQ_API_TOKEN:
|
257 |
-
# raise ValueError("Groq API token is missing. Set 'GROQ_API' in your environment variables.")
|
258 |
-
# client = Groq(api_key=GROQ_API_TOKEN)
|
259 |
-
|
260 |
-
# # Initialize the chat history
|
261 |
-
# chat_history = []
|
262 |
-
|
263 |
-
# # Function to get the LLM response from Groq with timeout handling
|
264 |
-
# def get_llm_response(user_input, role="detailed responder"):
|
265 |
-
# prompt = f"As an expert, provide a detailed and knowledgeable response: {user_input}" if role == "expert" else \
|
266 |
-
# f"As a good assistant, provide a clear, concise, and helpful response: {user_input}" if role == "good assistant" else \
|
267 |
-
# f"Provide a thorough and detailed response: {user_input}"
|
268 |
-
|
269 |
-
# try:
|
270 |
-
# chat_completion = client.chat.completions.create(
|
271 |
-
# messages=[
|
272 |
-
# {
|
273 |
-
# "role": "system",
|
274 |
-
# "content": "You are a highly experienced Spoken English expert. Your task is to provide detailed feedback on the following transcription from an English learner. Focus on pronunciation, grammar, fluency, vocabulary usage, sentence structure, coherence, cohesion, and intonation. Provide feedback in these sections: Summary of Feedback, Detailed Mistake Identification, Suggestions for Improvement, and Encouragement."
|
275 |
-
# },
|
276 |
-
# {
|
277 |
-
# "role": "user",
|
278 |
-
# "content": f"Please provide feedback on the following spoken English: {transcription}"
|
279 |
-
# }
|
280 |
-
# ],
|
281 |
-
# model="llama3-8b-8192", # Replace with your desired model
|
282 |
-
# timeout=10 # Set a 10-second timeout
|
283 |
-
# )
|
284 |
-
# return chat_completion.choices[0].message.content
|
285 |
-
# except Exception as e:
|
286 |
-
# print(f"Error during LLM response retrieval: {e}")
|
287 |
-
# return "Sorry, there was an error retrieving the response. Please try again."
|
288 |
-
|
289 |
-
# # Function to convert text to speech using gTTS
|
290 |
-
# def text_to_speech(text):
|
291 |
-
# try:
|
292 |
-
# tts = gTTS(text)
|
293 |
-
# output_audio = "output_audio.mp3"
|
294 |
-
# tts.save(output_audio)
|
295 |
-
# return output_audio
|
296 |
-
# except Exception as e:
|
297 |
-
# print(f"Error generating TTS: {e}")
|
298 |
-
# return None
|
299 |
-
|
300 |
-
# # Main chatbot function to handle audio input and output with chat history
|
301 |
-
# def chatbot(audio):
|
302 |
-
# if not model:
|
303 |
-
# return "Error: Whisper model is not available.", None, chat_history
|
304 |
-
|
305 |
-
# if not audio:
|
306 |
-
# return "No audio provided. Please upload a valid audio file.", None, chat_history
|
307 |
-
|
308 |
-
# try:
|
309 |
-
# # Step 1: Transcribe the audio using Whisper
|
310 |
-
# result = model.transcribe(audio)
|
311 |
-
# user_text = result.get("text", "")
|
312 |
-
# if not user_text.strip():
|
313 |
-
# return "Could not understand the audio. Please try speaking more clearly.", None, chat_history
|
314 |
|
315 |
-
# # Step 2: Get LLM response from Groq
|
316 |
-
# response_text = get_llm_response(user_text)
|
317 |
-
|
318 |
-
# # Step 3: Convert the response text to speech
|
319 |
-
# output_audio = text_to_speech(response_text)
|
320 |
-
|
321 |
# # Append the latest interaction to the chat history
|
322 |
-
# chat_history.append(("
|
323 |
-
# chat_history.append(("Chatbot", response_text))
|
324 |
|
325 |
-
# # Format the chat history for display
|
326 |
-
# formatted_history = "\n".join([f"{speaker}: {text}" for speaker, text in chat_history])
|
327 |
|
328 |
# return formatted_history, output_audio, chat_history
|
329 |
|
@@ -334,7 +256,7 @@ if __name__ == "__main__":
|
|
334 |
# # Gradio interface for real-time interaction with chat history display
|
335 |
# iface = gr.Interface(
|
336 |
# fn=chatbot,
|
337 |
-
# inputs=gr.Audio(type="filepath"),
|
338 |
# outputs=[
|
339 |
# gr.Textbox(label="Chat History"), # Display chat history
|
340 |
# gr.Audio(type="filepath", label="Response Audio"),
|
@@ -349,100 +271,3 @@ if __name__ == "__main__":
|
|
349 |
# if __name__ == "__main__":
|
350 |
# iface.launch()
|
351 |
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
#-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
# # Import libraries
|
361 |
-
# import whisper
|
362 |
-
# import os
|
363 |
-
# from gtts import gTTS
|
364 |
-
# import gradio as gr
|
365 |
-
# from groq import Groq
|
366 |
-
|
367 |
-
# # Load a smaller Whisper model for faster processing
|
368 |
-
# try:
|
369 |
-
# model = whisper.load_model("base") # Use "base" for faster processing
|
370 |
-
# except Exception as e:
|
371 |
-
# print(f"Error loading Whisper model: {e}")
|
372 |
-
# model = None
|
373 |
-
|
374 |
-
# # Set up Groq API client using environment variable
|
375 |
-
# GROQ_API_TOKEN = os.getenv("GROQ_API")
|
376 |
-
# if not GROQ_API_TOKEN:
|
377 |
-
# raise ValueError("Groq API token is missing. Set 'GROQ_API' in your environment variables.")
|
378 |
-
# client = Groq(api_key=GROQ_API_TOKEN)
|
379 |
-
|
380 |
-
# # Function to get the LLM response from Groq with timeout handling
|
381 |
-
# def get_llm_response(user_input, role="detailed responder"):
|
382 |
-
# prompt = f"As an expert, provide a detailed and knowledgeable response: {user_input}" if role == "expert" else \
|
383 |
-
# f"As a good assistant, provide a clear, concise, and helpful response: {user_input}" if role == "good assistant" else \
|
384 |
-
# f"Provide a thorough and detailed response: {user_input}"
|
385 |
-
|
386 |
-
# try:
|
387 |
-
# chat_completion = client.chat.completions.create(
|
388 |
-
# messages=[{"role": "user", "content": prompt}],
|
389 |
-
# model="llama3-8b-8192", # Replace with your desired model
|
390 |
-
# timeout=10 # Set a 10-second timeout
|
391 |
-
# )
|
392 |
-
# return chat_completion.choices[0].message.content
|
393 |
-
# except Exception as e:
|
394 |
-
# print(f"Error during LLM response retrieval: {e}")
|
395 |
-
# return "Sorry, there was an error retrieving the response. Please try again."
|
396 |
-
|
397 |
-
# # Function to convert text to speech using gTTS
|
398 |
-
# def text_to_speech(text):
|
399 |
-
# try:
|
400 |
-
# tts = gTTS(text)
|
401 |
-
# output_audio = "output_audio.mp3"
|
402 |
-
# tts.save(output_audio)
|
403 |
-
# return output_audio
|
404 |
-
# except Exception as e:
|
405 |
-
# print(f"Error generating TTS: {e}")
|
406 |
-
# return None
|
407 |
-
|
408 |
-
# # Main chatbot function to handle audio input and output
|
409 |
-
# def chatbot(audio):
|
410 |
-
# if not model:
|
411 |
-
# return "Error: Whisper model is not available.", None
|
412 |
-
|
413 |
-
# if not audio:
|
414 |
-
# return "No audio provided. Please upload a valid audio file.", None
|
415 |
-
|
416 |
-
# try:
|
417 |
-
# # Step 1: Transcribe the audio using Whisper
|
418 |
-
# result = model.transcribe(audio)
|
419 |
-
# user_text = result.get("text", "")
|
420 |
-
# if not user_text.strip():
|
421 |
-
# return "Could not understand the audio. Please try speaking more clearly.", None
|
422 |
-
|
423 |
-
# # Step 2: Get LLM response from Groq
|
424 |
-
# response_text = get_llm_response(user_text)
|
425 |
-
|
426 |
-
# # Step 3: Convert the response text to speech
|
427 |
-
# output_audio = text_to_speech(response_text)
|
428 |
-
|
429 |
-
# return response_text, output_audio
|
430 |
-
|
431 |
-
# except Exception as e:
|
432 |
-
# print(f"Error in chatbot function: {e}")
|
433 |
-
# return "Sorry, there was an error processing your request.", None
|
434 |
-
|
435 |
-
# # Gradio interface for real-time interaction
|
436 |
-
# iface = gr.Interface(
|
437 |
-
# fn=chatbot,
|
438 |
-
# inputs=gr.Audio(type="filepath"), # Removed 'optional' argument
|
439 |
-
# outputs=[gr.Textbox(label="Chatbot Response"), gr.Audio(type="filepath", label="Response Audio")],
|
440 |
-
# live=True,
|
441 |
-
# title="Audio Chatbot with Groq API",
|
442 |
-
# description="Upload your audio, and the chatbot will transcribe and respond to it with a synthesized response.",
|
443 |
-
# theme="default"
|
444 |
-
# )
|
445 |
-
|
446 |
-
# # Launch the Gradio app
|
447 |
-
# if __name__ == "__main__":
|
448 |
-
# iface.launch()
|
|
|
3 |
from gtts import gTTS
|
4 |
import gradio as gr
|
5 |
from groq import Groq
|
6 |
+
from datetime import datetime
|
7 |
|
8 |
# Load a smaller Whisper model for faster processing
|
9 |
try:
|
|
|
88 |
print(f"Error in chatbot function: {e}")
|
89 |
return "Sorry, there was an error processing your request.", None, chat_history
|
90 |
|
91 |
+
# CSS for custom background and styling
|
92 |
+
custom_css = """
|
93 |
+
<style>
|
94 |
+
body {
|
95 |
+
background-color: #f4f4f4;
|
96 |
+
font-family: Arial, sans-serif;
|
97 |
+
}
|
98 |
+
.container {
|
99 |
+
background: linear-gradient(135deg, #84fab0 0%, #8fd3f4 100%);
|
100 |
+
border-radius: 12px;
|
101 |
+
padding: 20px;
|
102 |
+
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
|
103 |
+
max-width: 700px;
|
104 |
+
margin: auto;
|
105 |
+
}
|
106 |
+
.chat-history {
|
107 |
+
background-color: #ffffff;
|
108 |
+
border-radius: 10px;
|
109 |
+
padding: 15px;
|
110 |
+
height: 300px;
|
111 |
+
overflow-y: auto;
|
112 |
+
box-shadow: inset 0 0 5px rgba(0, 0, 0, 0.1);
|
113 |
+
}
|
114 |
+
.user-message {
|
115 |
+
color: #0084ff;
|
116 |
+
font-weight: bold;
|
117 |
+
margin-bottom: 5px;
|
118 |
+
}
|
119 |
+
.bot-message {
|
120 |
+
color: #5a5a5a;
|
121 |
+
margin-bottom: 10px;
|
122 |
+
}
|
123 |
+
.audio-output {
|
124 |
+
margin-top: 15px;
|
125 |
+
background-color: #d8f3dc;
|
126 |
+
border-radius: 8px;
|
127 |
+
padding: 10px;
|
128 |
+
text-align: center;
|
129 |
+
}
|
130 |
+
</style>
|
131 |
+
"""
|
132 |
+
|
133 |
# Gradio interface for real-time interaction with chat history display
|
134 |
iface = gr.Interface(
|
135 |
fn=chatbot,
|
136 |
inputs=gr.Audio(type="filepath"),
|
137 |
outputs=[
|
138 |
+
gr.HTML("<div class='chat-history'></div>"), # Display chat history in a styled div
|
139 |
gr.Audio(type="filepath", label="Response Audio"),
|
140 |
],
|
141 |
live=True,
|
142 |
+
title="Stylish Audio Chatbot with Groq API",
|
143 |
description="Upload your audio, and the chatbot will transcribe and respond to it with a synthesized response.",
|
144 |
theme="default"
|
145 |
)
|
146 |
|
147 |
+
# Inject the custom CSS into the interface
|
148 |
+
iface.css = custom_css
|
149 |
+
|
150 |
# Launch the Gradio app
|
151 |
if __name__ == "__main__":
|
152 |
iface.launch()
|
|
|
159 |
|
160 |
|
161 |
|
162 |
+
|
163 |
+
|
164 |
+
|
165 |
+
|
166 |
# import whisper
|
167 |
# import os
|
168 |
# from gtts import gTTS
|
169 |
# import gradio as gr
|
170 |
# from groq import Groq
|
171 |
+
# from datetime import datetime # Import datetime to handle timestamps
|
172 |
|
173 |
# # Load a smaller Whisper model for faster processing
|
174 |
# try:
|
175 |
+
# model = whisper.load_model("tiny")
|
|
|
176 |
# except Exception as e:
|
177 |
# print(f"Error loading Whisper model: {e}")
|
178 |
# model = None
|
|
|
195 |
# try:
|
196 |
# chat_completion = client.chat.completions.create(
|
197 |
# messages=[{"role": "user", "content": user_input}],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
# model="llama3-8b-8192", # Replace with your desired model
|
199 |
# timeout=20 # Increased timeout to 20 seconds
|
200 |
# )
|
|
|
229 |
# if not user_text.strip():
|
230 |
# return "Could not understand the audio. Please try speaking more clearly.", None, chat_history
|
231 |
|
232 |
+
# # Get current timestamp
|
233 |
+
# timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
234 |
+
|
235 |
# # Display transcription in chat history
|
236 |
+
# chat_history.append((timestamp, "User", user_text))
|
237 |
|
238 |
# # Step 2: Get LLM response from Groq
|
239 |
# response_text = get_llm_response(user_text)
|
240 |
|
241 |
# # Step 3: Convert the response text to speech
|
242 |
# output_audio = text_to_speech(response_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
# # Append the latest interaction to the chat history
|
245 |
+
# chat_history.append((timestamp, "Chatbot", response_text))
|
|
|
246 |
|
247 |
+
# # Format the chat history for display with timestamps and clear labels
|
248 |
+
# formatted_history = "\n".join([f"[{time}] {speaker}: {text}" for time, speaker, text in chat_history])
|
249 |
|
250 |
# return formatted_history, output_audio, chat_history
|
251 |
|
|
|
256 |
# # Gradio interface for real-time interaction with chat history display
|
257 |
# iface = gr.Interface(
|
258 |
# fn=chatbot,
|
259 |
+
# inputs=gr.Audio(type="filepath"),
|
260 |
# outputs=[
|
261 |
# gr.Textbox(label="Chat History"), # Display chat history
|
262 |
# gr.Audio(type="filepath", label="Response Audio"),
|
|
|
271 |
# if __name__ == "__main__":
|
272 |
# iface.launch()
|
273 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|