Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,6 @@ import base64
|
|
4 |
from PIL import Image
|
5 |
import io
|
6 |
import os
|
7 |
-
import streamlit as st
|
8 |
from helpers import text_to_speech, autoplay_audio, speech_to_text, get_api_key
|
9 |
from generate_answer import base_model_chatbot, with_pdf_chatbot
|
10 |
from audio_recorder_streamlit import audio_recorder
|
@@ -267,7 +266,7 @@ def create_interface():
|
|
267 |
|
268 |
return demo
|
269 |
|
270 |
-
#
|
271 |
def voice_chat():
|
272 |
# Float feature initialization
|
273 |
float_init()
|
@@ -275,56 +274,57 @@ def voice_chat():
|
|
275 |
# Prompt for API key
|
276 |
api_key = get_api_key()
|
277 |
if not api_key:
|
278 |
-
|
279 |
-
|
280 |
|
281 |
def initialize_session_state():
|
282 |
-
if "messages" not in
|
283 |
-
|
284 |
{"role": "assistant", "content": "Hi! How may I assist you today? (Please Speak Clearly)"}
|
285 |
]
|
286 |
|
287 |
initialize_session_state()
|
288 |
|
289 |
-
|
|
|
|
|
|
|
290 |
|
291 |
-
# Create footer container for the microphone
|
292 |
-
footer_container = st.container()
|
293 |
with footer_container:
|
294 |
audio_bytes = audio_recorder()
|
295 |
|
296 |
-
for message in
|
297 |
-
with
|
298 |
-
|
299 |
|
300 |
if audio_bytes:
|
301 |
# Write the audio bytes to a file
|
302 |
-
with
|
303 |
webm_file_path = "temp_audio.mp3"
|
304 |
with open(webm_file_path, "wb") as f:
|
305 |
f.write(audio_bytes)
|
306 |
|
307 |
transcript = speech_to_text(webm_file_path)
|
308 |
if transcript:
|
309 |
-
|
310 |
-
with
|
311 |
-
|
312 |
os.remove(webm_file_path)
|
313 |
|
314 |
-
if
|
315 |
-
with
|
316 |
-
with
|
317 |
-
final_response = base_model_chatbot(
|
318 |
|
319 |
-
#
|
320 |
if not final_response.strip()[-1] in ".!?":
|
321 |
final_response += " This is the end of the response. Let me know if you need anything else."
|
322 |
|
323 |
-
with
|
324 |
audio_file = text_to_speech(final_response)
|
325 |
autoplay_audio(audio_file)
|
326 |
-
|
327 |
-
|
328 |
os.remove(audio_file)
|
329 |
|
330 |
# Float the footer container and provide CSS to target it with
|
@@ -334,5 +334,5 @@ if __name__ == "__main__":
|
|
334 |
demo = create_interface() # Gradio multimodal chatbot
|
335 |
demo.launch()
|
336 |
|
337 |
-
#
|
338 |
voice_chat()
|
|
|
4 |
from PIL import Image
|
5 |
import io
|
6 |
import os
|
|
|
7 |
from helpers import text_to_speech, autoplay_audio, speech_to_text, get_api_key
|
8 |
from generate_answer import base_model_chatbot, with_pdf_chatbot
|
9 |
from audio_recorder_streamlit import audio_recorder
|
|
|
266 |
|
267 |
return demo
|
268 |
|
269 |
+
# Voice interaction (audio chat) setup for Gradio
|
270 |
def voice_chat():
|
271 |
# Float feature initialization
|
272 |
float_init()
|
|
|
274 |
# Prompt for API key
|
275 |
api_key = get_api_key()
|
276 |
if not api_key:
|
277 |
+
gr.error("You must provide a valid OpenAI API Key to proceed.")
|
278 |
+
return
|
279 |
|
280 |
def initialize_session_state():
|
281 |
+
if "messages" not in gr.session_state:
|
282 |
+
gr.session_state.messages = [
|
283 |
{"role": "assistant", "content": "Hi! How may I assist you today? (Please Speak Clearly)"}
|
284 |
]
|
285 |
|
286 |
initialize_session_state()
|
287 |
|
288 |
+
gr.title("OpenAI Conversational Chatbot (Voice Interaction) 🤖")
|
289 |
+
|
290 |
+
# Footer container for the microphone
|
291 |
+
footer_container = gr.container()
|
292 |
|
|
|
|
|
293 |
with footer_container:
|
294 |
audio_bytes = audio_recorder()
|
295 |
|
296 |
+
for message in gr.session_state.messages:
|
297 |
+
with gr.chat_message(message["role"]):
|
298 |
+
gr.write(message["content"])
|
299 |
|
300 |
if audio_bytes:
|
301 |
# Write the audio bytes to a file
|
302 |
+
with gr.spinner("Transcribing..."):
|
303 |
webm_file_path = "temp_audio.mp3"
|
304 |
with open(webm_file_path, "wb") as f:
|
305 |
f.write(audio_bytes)
|
306 |
|
307 |
transcript = speech_to_text(webm_file_path)
|
308 |
if transcript:
|
309 |
+
gr.session_state.messages.append({"role": "user", "content": transcript})
|
310 |
+
with gr.chat_message("user"):
|
311 |
+
gr.write(transcript)
|
312 |
os.remove(webm_file_path)
|
313 |
|
314 |
+
if gr.session_state.messages[-1]["role"] != "assistant":
|
315 |
+
with gr.chat_message("assistant"):
|
316 |
+
with gr.spinner("Thinking🤔..."):
|
317 |
+
final_response = base_model_chatbot(gr.session_state.messages)
|
318 |
|
319 |
+
# Final check for punctuation and completeness
|
320 |
if not final_response.strip()[-1] in ".!?":
|
321 |
final_response += " This is the end of the response. Let me know if you need anything else."
|
322 |
|
323 |
+
with gr.spinner("Generating audio response..."):
|
324 |
audio_file = text_to_speech(final_response)
|
325 |
autoplay_audio(audio_file)
|
326 |
+
gr.write(final_response)
|
327 |
+
gr.session_state.messages.append({"role": "assistant", "content": final_response})
|
328 |
os.remove(audio_file)
|
329 |
|
330 |
# Float the footer container and provide CSS to target it with
|
|
|
334 |
demo = create_interface() # Gradio multimodal chatbot
|
335 |
demo.launch()
|
336 |
|
337 |
+
# Gradio voice chat
|
338 |
voice_chat()
|