Spaces:

shukdevdatta123
/

Multi-modal-o1-Chatbot

Running

App Files Files Community

shukdevdatta123 commited on Mar 8

Commit

ee274c9

verified ·

1 Parent(s): 14868b1

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -25

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ import base64
 from PIL import Image
 import io
 import os
-import streamlit as st
 from helpers import text_to_speech, autoplay_audio, speech_to_text, get_api_key
 from generate_answer import base_model_chatbot, with_pdf_chatbot
 from audio_recorder_streamlit import audio_recorder
@@ -267,7 +266,7 @@ def create_interface():
     return demo
-# Streamlit voice chat app code
 def voice_chat():
     # Float feature initialization
     float_init()
@@ -275,56 +274,57 @@ def voice_chat():
     # Prompt for API key
     api_key = get_api_key()
     if not api_key:
-        st.error("You must provide a valid OpenAI API Key to proceed.")
-        st.stop()
     def initialize_session_state():
-        if "messages" not in st.session_state:
-            st.session_state.messages = [
                 {"role": "assistant", "content": "Hi! How may I assist you today? (Please Speak Clearly)"}
             ]
     initialize_session_state()
-    st.title("OpenAI Conversational Chatbot (Voice Interaction) 🤖")
-    # Create footer container for the microphone
-    footer_container = st.container()
     with footer_container:
         audio_bytes = audio_recorder()
-    for message in st.session_state.messages:
-        with st.chat_message(message["role"]):
-            st.write(message["content"])
     if audio_bytes:
         # Write the audio bytes to a file
-        with st.spinner("Transcribing..."):
             webm_file_path = "temp_audio.mp3"
             with open(webm_file_path, "wb") as f:
                 f.write(audio_bytes)
             transcript = speech_to_text(webm_file_path)
             if transcript:
-                st.session_state.messages.append({"role": "user", "content": transcript})
-                with st.chat_message("user"):
-                    st.write(transcript)
                 os.remove(webm_file_path)
-    if st.session_state.messages[-1]["role"] != "assistant":
-        with st.chat_message("assistant"):
-            with st.spinner("Thinking🤔..."):
-                final_response = base_model_chatbot(st.session_state.messages)
-                # Add final check for punctuation and completeness
                 if not final_response.strip()[-1] in ".!?":
                     final_response += " This is the end of the response. Let me know if you need anything else."
-            with st.spinner("Generating audio response..."):
                 audio_file = text_to_speech(final_response)
                 autoplay_audio(audio_file)
-            st.write(final_response)
-            st.session_state.messages.append({"role": "assistant", "content": final_response})
             os.remove(audio_file)
     # Float the footer container and provide CSS to target it with
@@ -334,5 +334,5 @@ if __name__ == "__main__":
     demo = create_interface()  # Gradio multimodal chatbot
     demo.launch()
-    # Streamlit voice chat
     voice_chat()

 from PIL import Image
 import io
 import os
 from helpers import text_to_speech, autoplay_audio, speech_to_text, get_api_key
 from generate_answer import base_model_chatbot, with_pdf_chatbot
 from audio_recorder_streamlit import audio_recorder
     return demo
+# Voice interaction (audio chat) setup for Gradio
 def voice_chat():
     # Float feature initialization
     float_init()
     # Prompt for API key
     api_key = get_api_key()
     if not api_key:
+        gr.error("You must provide a valid OpenAI API Key to proceed.")
+        return
     def initialize_session_state():
+        if "messages" not in gr.session_state:
+            gr.session_state.messages = [
                 {"role": "assistant", "content": "Hi! How may I assist you today? (Please Speak Clearly)"}
             ]
     initialize_session_state()
+    gr.title("OpenAI Conversational Chatbot (Voice Interaction) 🤖")
+    # Footer container for the microphone
+    footer_container = gr.container()
     with footer_container:
         audio_bytes = audio_recorder()
+    for message in gr.session_state.messages:
+        with gr.chat_message(message["role"]):
+            gr.write(message["content"])
     if audio_bytes:
         # Write the audio bytes to a file
+        with gr.spinner("Transcribing..."):
             webm_file_path = "temp_audio.mp3"
             with open(webm_file_path, "wb") as f:
                 f.write(audio_bytes)
             transcript = speech_to_text(webm_file_path)
             if transcript:
+                gr.session_state.messages.append({"role": "user", "content": transcript})
+                with gr.chat_message("user"):
+                    gr.write(transcript)
                 os.remove(webm_file_path)
+    if gr.session_state.messages[-1]["role"] != "assistant":
+        with gr.chat_message("assistant"):
+            with gr.spinner("Thinking🤔..."):
+                final_response = base_model_chatbot(gr.session_state.messages)
+                # Final check for punctuation and completeness
                 if not final_response.strip()[-1] in ".!?":
                     final_response += " This is the end of the response. Let me know if you need anything else."
+            with gr.spinner("Generating audio response..."):
                 audio_file = text_to_speech(final_response)
                 autoplay_audio(audio_file)
+            gr.write(final_response)
+            gr.session_state.messages.append({"role": "assistant", "content": final_response})
             os.remove(audio_file)
     # Float the footer container and provide CSS to target it with
     demo = create_interface()  # Gradio multimodal chatbot
     demo.launch()
+    # Gradio voice chat
     voice_chat()