Spaces:

shukdevdatta123
/

Multi-modal-o1-Chatbot

Running

App Files Files Community

shukdevdatta123 commited on Mar 8

Commit

14868b1

verified ·

1 Parent(s): 730c789

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -5

app.py CHANGED Viewed

@@ -3,6 +3,13 @@ import openai
 import base64
 from PIL import Image
 import io
 # Function to send the request to OpenAI API with an image or text input
 def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"):
@@ -132,7 +139,6 @@ custom_css = """
     #submit-btn:active {
         transform: scale(0.95);
     }
-    /* Clear History Button: Light Red */
     #clear-history {
         background-color: #f04e4e; /* Slightly Darker red */
         color: white;
@@ -205,7 +211,7 @@ custom_css = """
     }
 """
-# Gradio interface setup
 def create_interface():
     with gr.Blocks(css=custom_css) as demo:
         gr.Markdown("""
@@ -261,7 +267,72 @@ def create_interface():
     return demo
-# Run the interface
 if __name__ == "__main__":
-    demo = create_interface()
-    demo.launch()

 import base64
 from PIL import Image
 import io
+import os
+import streamlit as st
+from helpers import text_to_speech, autoplay_audio, speech_to_text, get_api_key
+from generate_answer import base_model_chatbot, with_pdf_chatbot
+from audio_recorder_streamlit import audio_recorder
+from streamlit_float import *
+from PIL import Image as stImage
 # Function to send the request to OpenAI API with an image or text input
 def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"):
     #submit-btn:active {
         transform: scale(0.95);
     }
     #clear-history {
         background-color: #f04e4e; /* Slightly Darker red */
         color: white;
     }
 """
+# Gradio interface setup for multimodal chatbot
 def create_interface():
     with gr.Blocks(css=custom_css) as demo:
         gr.Markdown("""
     return demo
+# Streamlit voice chat app code
+def voice_chat():
+    # Float feature initialization
+    float_init()
+    # Prompt for API key
+    api_key = get_api_key()
+    if not api_key:
+        st.error("You must provide a valid OpenAI API Key to proceed.")
+        st.stop()
+    def initialize_session_state():
+        if "messages" not in st.session_state:
+            st.session_state.messages = [
+                {"role": "assistant", "content": "Hi! How may I assist you today? (Please Speak Clearly)"}
+            ]
+    initialize_session_state()
+    st.title("OpenAI Conversational Chatbot (Voice Interaction) 🤖")
+    # Create footer container for the microphone
+    footer_container = st.container()
+    with footer_container:
+        audio_bytes = audio_recorder()
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.write(message["content"])
+    if audio_bytes:
+        # Write the audio bytes to a file
+        with st.spinner("Transcribing..."):
+            webm_file_path = "temp_audio.mp3"
+            with open(webm_file_path, "wb") as f:
+                f.write(audio_bytes)
+            transcript = speech_to_text(webm_file_path)
+            if transcript:
+                st.session_state.messages.append({"role": "user", "content": transcript})
+                with st.chat_message("user"):
+                    st.write(transcript)
+                os.remove(webm_file_path)
+    if st.session_state.messages[-1]["role"] != "assistant":
+        with st.chat_message("assistant"):
+            with st.spinner("Thinking🤔..."):
+                final_response = base_model_chatbot(st.session_state.messages)
+                # Add final check for punctuation and completeness
+                if not final_response.strip()[-1] in ".!?":
+                    final_response += " This is the end of the response. Let me know if you need anything else."
+            with st.spinner("Generating audio response..."):
+                audio_file = text_to_speech(final_response)
+                autoplay_audio(audio_file)
+            st.write(final_response)
+            st.session_state.messages.append({"role": "assistant", "content": final_response})
+            os.remove(audio_file)
+    # Float the footer container and provide CSS to target it with
+    footer_container.float("bottom: 0rem;")
 if __name__ == "__main__":
+    demo = create_interface()  # Gradio multimodal chatbot
+    demo.launch()
+    # Streamlit voice chat
+    voice_chat()