Spaces:

shukdevdatta123
/

Multi-modal-o1-Chatbot

Running

App Files Files Community

shukdevdatta123 commited on Mar 10

Commit

3324f5a

verified ·

1 Parent(s): 35d1afd

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -4

app.py CHANGED Viewed

@@ -77,12 +77,15 @@ def transcribe_audio(audio, openai_api_key):
         return f"Error transcribing audio: {str(e)}"
 # The function that will be used by Gradio interface
-def chatbot(input_text, image, audio, openai_api_key, reasoning_effort, model_choice, history=[]):
     # If there's audio, transcribe it to text
-    if audio:
         input_text = transcribe_audio(audio, openai_api_key)
-    response = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice)
     # Append the response to the history
     history.append((f"User: {input_text}", f"Assistant: {response}"))
@@ -266,6 +269,13 @@ def create_interface():
         with gr.Row():
             openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)
         with gr.Row():
             image_input = gr.Image(label="Upload an Image", type="pil")  # Image upload input
             input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
@@ -288,7 +298,7 @@ def create_interface():
         chat_history = gr.Chatbot()
         # Button interactions
-        submit_btn.click(fn=chatbot, inputs=[input_text, image_input, audio_input, openai_api_key, reasoning_effort, model_choice, chat_history], outputs=[input_text, chat_history])
         clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
     return demo

         return f"Error transcribing audio: {str(e)}"
 # The function that will be used by Gradio interface
+def chatbot(input_text, image, audio, openai_api_key, reasoning_effort, model_choice, input_mode, history=[]):
     # If there's audio, transcribe it to text
+    if audio and input_mode == "Voice":
         input_text = transcribe_audio(audio, openai_api_key)
+    if input_mode == "Text" or input_mode == "Voice":
+        response = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice)
+    elif input_mode == "Image":
+        response = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice)
     # Append the response to the history
     history.append((f"User: {input_text}", f"Assistant: {response}"))
         with gr.Row():
             openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)
+        with gr.Row():
+            input_mode = gr.Dropdown(
+                label="Select Input Mode",
+                choices=["Text", "Image", "Voice"],
+                value="Text"
+            )
         with gr.Row():
             image_input = gr.Image(label="Upload an Image", type="pil")  # Image upload input
             input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
         chat_history = gr.Chatbot()
         # Button interactions
+        submit_btn.click(fn=chatbot, inputs=[input_text, image_input, audio_input, openai_api_key, reasoning_effort, model_choice, input_mode, chat_history], outputs=[input_text, chat_history])
         clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
     return demo