shukdevdatta123 commited on
Commit
35d1afd
·
verified ·
1 Parent(s): 13a5c1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -20
app.py CHANGED
@@ -238,33 +238,60 @@ custom_css = """
238
  # Gradio interface setup
239
  def create_interface():
240
  with gr.Blocks(css=custom_css) as demo:
241
- gr.Markdown("""<div class="gradio-header"><h1>Multimodal Chatbot (Text + Image + Voice)</h1><h3>Interact with a chatbot using text, image, or voice inputs</h3></div>""")
242
-
 
 
 
 
 
 
243
  with gr.Accordion("Click to expand for details", open=False):
244
- gr.Markdown("""### Description: This is a multimodal chatbot that can handle text, image, and voice inputs. [Explanation Here]""")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
- with gr.TabItem("Text Chat"):
 
247
  input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
248
- submit_btn = gr.Button("Ask!")
249
- chat_history = gr.Chatbot()
250
- submit_btn.click(fn=chatbot, inputs=[input_text, None, None, gr.Textbox(), "medium", "o3-mini", chat_history], outputs=[input_text, chat_history])
251
 
252
- with gr.TabItem("Image Chat"):
253
- image_input = gr.Image(label="Upload an Image", type="pil")
254
- submit_btn = gr.Button("Ask!")
255
- chat_history = gr.Chatbot()
256
- submit_btn.click(fn=chatbot, inputs=[None, image_input, None, gr.Textbox(), "medium", "o1", chat_history], outputs=[input_text, chat_history])
 
 
 
 
 
 
 
 
257
 
258
- with gr.TabItem("Voice Chat"):
259
- audio_input = gr.Audio(label="Upload or Record Audio", type="filepath")
260
- submit_btn = gr.Button("Ask!")
261
- chat_history = gr.Chatbot()
262
- submit_btn.click(fn=chatbot, inputs=[None, None, audio_input, gr.Textbox(), "medium", "o3-mini", chat_history], outputs=[input_text, chat_history])
263
 
264
- # Interface list is a list of all tabs
265
- interface_list = [input_text, image_input, audio_input]
 
266
 
267
- return demo
268
 
269
  # Run the interface
270
  if __name__ == "__main__":
 
238
  # Gradio interface setup
239
  def create_interface():
240
  with gr.Blocks(css=custom_css) as demo:
241
+ gr.Markdown("""
242
+ <div class="gradio-header">
243
+ <h1>Multimodal Chatbot (Text + Image + Voice)</h1>
244
+ <h3>Interact with a chatbot using text, image, or voice inputs</h3>
245
+ </div>
246
+ """)
247
+
248
+ # Add a description with an expandable accordion
249
  with gr.Accordion("Click to expand for details", open=False):
250
+ gr.Markdown("""
251
+ ### Description:
252
+ This is a multimodal chatbot that can handle text, image, and voice inputs.
253
+ - You can ask questions or provide text, and the assistant will respond.
254
+ - You can also upload an image, and the assistant will process it and answer questions about the image.
255
+ - Voice input is supported: You can upload or record an audio file, and it will be transcribed to text and sent to the assistant.
256
+ - Enter your OpenAI API key to start interacting with the model.
257
+ - You can use the 'Clear History' button to remove the conversation history.
258
+ - "o1" is for image chat and "o3-mini" is for text chat.
259
+ ### Reasoning Effort:
260
+ The reasoning effort controls how complex or detailed the assistant's answers should be.
261
+ - **Low**: Provides quick, concise answers with minimal reasoning or details.
262
+ - **Medium**: Offers a balanced response with a reasonable level of detail and thought.
263
+ - **High**: Produces more detailed, analytical, or thoughtful responses, requiring deeper reasoning.
264
+ """)
265
+
266
+ with gr.Row():
267
+ openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)
268
 
269
+ with gr.Row():
270
+ image_input = gr.Image(label="Upload an Image", type="pil") # Image upload input
271
  input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
272
+ audio_input = gr.Audio(label="Upload or Record Audio", type="filepath") # Audio upload or record input (using filepath)
 
 
273
 
274
+ with gr.Row():
275
+ reasoning_effort = gr.Dropdown(
276
+ label="Reasoning Effort",
277
+ choices=["low", "medium", "high"],
278
+ value="medium"
279
+ )
280
+ model_choice = gr.Dropdown(
281
+ label="Select Model",
282
+ choices=["o1", "o3-mini"],
283
+ value="o1" # Default to 'o1' for image-related tasks
284
+ )
285
+ submit_btn = gr.Button("Ask!", elem_id="submit-btn")
286
+ clear_btn = gr.Button("Clear History", elem_id="clear-history")
287
 
288
+ chat_history = gr.Chatbot()
 
 
 
 
289
 
290
+ # Button interactions
291
+ submit_btn.click(fn=chatbot, inputs=[input_text, image_input, audio_input, openai_api_key, reasoning_effort, model_choice, chat_history], outputs=[input_text, chat_history])
292
+ clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
293
 
294
+ return demo
295
 
296
  # Run the interface
297
  if __name__ == "__main__":