Spaces:

shukdevdatta123
/

Multi-modal-o1-Chatbot

Running

App Files Files Community

shukdevdatta123 commited on Mar 8

Commit

1b21a19

verified ·

1 Parent(s): 133de89

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -6

app.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import gradio as gr
 import openai
 import base64
-from PIL import Image
-import io
 import os
 from helpers import text_to_speech, autoplay_audio, speech_to_text, get_api_key
 from generate_answer import base_model_chatbot, with_pdf_chatbot
 from audio_recorder_streamlit import audio_recorder
@@ -24,11 +23,11 @@ def generate_response(input_text, image, openai_api_key, reasoning_effort="mediu
         input_text = f"data:image/png;base64,{image_info}"
     # Prepare the messages for OpenAI API
-    if model_choice == "o1":
         messages = [
             {"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_text}}]}
         ]
-    elif model_choice == "o3-mini":
         messages = [
             {"role": "user", "content": [{"type": "text", "text": input_text}]}
         ]
@@ -56,7 +55,11 @@ def get_base64_string_from_image(pil_image):
     return base64_str
 # The function that will be used by Gradio interface
-def chatbot(input_text, image, openai_api_key, reasoning_effort, model_choice, history=[]):
     response = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice)
     # Append the response to the history
@@ -267,7 +270,7 @@ def create_interface():
         chat_history = gr.Chatbot()
         # Button interactions
-        submit_btn.click(fn=chatbot, inputs=[input_text, image_input, openai_api_key, reasoning_effort, model_choice, chat_history], outputs=[input_text, chat_history])
         clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
     return demo

 import gradio as gr
 import openai
 import base64
 import os
+import io
 from helpers import text_to_speech, autoplay_audio, speech_to_text, get_api_key
 from generate_answer import base_model_chatbot, with_pdf_chatbot
 from audio_recorder_streamlit import audio_recorder
         input_text = f"data:image/png;base64,{image_info}"
     # Prepare the messages for OpenAI API
+    if model_choice == "o1" and input_text:
         messages = [
             {"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_text}}]}
         ]
+    elif model_choice == "o3-mini" and input_text:
         messages = [
             {"role": "user", "content": [{"type": "text", "text": input_text}]}
         ]
     return base64_str
 # The function that will be used by Gradio interface
+def chatbot(input_text, image, voice_audio, openai_api_key, reasoning_effort, model_choice, history=[]):
+    # If voice_audio is provided, convert it to text
+    if voice_audio:
+        input_text = speech_to_text(voice_audio)  # Convert speech to text
     response = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice)
     # Append the response to the history
         chat_history = gr.Chatbot()
         # Button interactions
+        submit_btn.click(fn=chatbot, inputs=[input_text, image_input, voice_input, openai_api_key, reasoning_effort, model_choice, chat_history], outputs=[input_text, chat_history])
         clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
     return demo