shukdevdatta123 commited on
Commit
bd42163
·
verified ·
1 Parent(s): 90d409d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -39
app.py CHANGED
@@ -77,20 +77,15 @@ def transcribe_audio(audio, openai_api_key):
77
  return f"Error transcribing audio: {str(e)}"
78
 
79
  # The function that will be used by Gradio interface
80
- def chatbot(input_text, image, audio, openai_api_key, reasoning_effort, model_choice, input_mode, history=[]):
81
  # If there's audio, transcribe it to text
82
- if audio and input_mode == "Voice":
83
  input_text = transcribe_audio(audio, openai_api_key)
84
 
85
- if input_mode == "Image" and image:
86
- # If Image Mode is selected and image is uploaded
87
- input_text = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice)
88
- elif input_mode == "Text" and input_text:
89
- # If Text Mode is selected
90
- input_text = generate_response(input_text, None, openai_api_key, reasoning_effort, model_choice)
91
-
92
  # Append the response to the history
93
- history.append((f"User: {input_text}", f"Assistant: {input_text}"))
94
 
95
  return "", history
96
 
@@ -243,25 +238,38 @@ custom_css = """
243
  # Gradio interface setup
244
  def create_interface():
245
  with gr.Blocks(css=custom_css) as demo:
246
- gr.Markdown("""<div class="gradio-header">
247
- <h1>Multimodal Chatbot (Text + Image + Voice)</h1>
248
- <h3>Interact with a chatbot using text, image, or voice inputs</h3>
249
- </div>""")
 
 
250
 
251
- # Choose input type (Text, Image, Voice)
252
- input_mode = gr.Radio(
253
- label="Choose Input Mode",
254
- choices=["Text", "Image", "Voice"],
255
- value="Text"
256
- )
 
 
 
 
 
 
 
 
 
 
 
257
 
258
  with gr.Row():
259
  openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)
260
 
261
- # Text, Image, and Audio Inputs will be displayed based on the chosen mode
262
- input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
263
- image_input = gr.Image(label="Upload an Image", type="pil")
264
- audio_input = gr.Audio(label="Upload or Record Audio", type="filepath")
265
 
266
  with gr.Row():
267
  reasoning_effort = gr.Dropdown(
@@ -272,32 +280,20 @@ def create_interface():
272
  model_choice = gr.Dropdown(
273
  label="Select Model",
274
  choices=["o1", "o3-mini"],
275
- value="o1"
276
  )
277
  submit_btn = gr.Button("Ask!", elem_id="submit-btn")
278
  clear_btn = gr.Button("Clear History", elem_id="clear-history")
279
 
280
  chat_history = gr.Chatbot()
281
 
282
- # Dynamically control the input visibility based on the selected mode
283
- def toggle_inputs(input_mode):
284
- if input_mode == "Text":
285
- return input_text, None, None
286
- elif input_mode == "Image":
287
- return input_text, image_input, None
288
- else: # Voice
289
- return None, None, audio_input
290
-
291
  # Button interactions
292
- submit_btn.click(fn=chatbot, inputs=[input_text, image_input, audio_input, openai_api_key, reasoning_effort, model_choice, input_mode, chat_history], outputs=[input_text, chat_history])
293
  clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
294
 
295
- input_mode.change(toggle_inputs, inputs=[input_mode], outputs=[input_text, image_input, audio_input])
296
-
297
  return demo
298
 
299
  # Run the interface
300
  if __name__ == "__main__":
301
  demo = create_interface()
302
- demo.launch()
303
-
 
77
  return f"Error transcribing audio: {str(e)}"
78
 
79
  # The function that will be used by Gradio interface
80
+ def chatbot(input_text, image, audio, openai_api_key, reasoning_effort, model_choice, history=[]):
81
  # If there's audio, transcribe it to text
82
+ if audio:
83
  input_text = transcribe_audio(audio, openai_api_key)
84
 
85
+ response = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice)
86
+
 
 
 
 
 
87
  # Append the response to the history
88
+ history.append((f"User: {input_text}", f"Assistant: {response}"))
89
 
90
  return "", history
91
 
 
238
  # Gradio interface setup
239
  def create_interface():
240
  with gr.Blocks(css=custom_css) as demo:
241
+ gr.Markdown("""
242
+ <div class="gradio-header">
243
+ <h1>Multimodal Chatbot (Text + Image + Voice)</h1>
244
+ <h3>Interact with a chatbot using text, image, or voice inputs</h3>
245
+ </div>
246
+ """)
247
 
248
+ # Add a description with an expandable accordion
249
+ with gr.Accordion("Click to expand for details", open=False):
250
+ gr.Markdown("""
251
+ ### Description:
252
+ This is a multimodal chatbot that can handle text, image, and voice inputs.
253
+ - You can ask questions or provide text, and the assistant will respond.
254
+ - You can also upload an image, and the assistant will process it and answer questions about the image.
255
+ - Voice input is supported: You can upload or record an audio file, and it will be transcribed to text and sent to the assistant.
256
+ - Enter your OpenAI API key to start interacting with the model.
257
+ - You can use the 'Clear History' button to remove the conversation history.
258
+ - "o1" is for image chat and "o3-mini" is for text chat.
259
+ ### Reasoning Effort:
260
+ The reasoning effort controls how complex or detailed the assistant's answers should be.
261
+ - **Low**: Provides quick, concise answers with minimal reasoning or details.
262
+ - **Medium**: Offers a balanced response with a reasonable level of detail and thought.
263
+ - **High**: Produces more detailed, analytical, or thoughtful responses, requiring deeper reasoning.
264
+ """)
265
 
266
  with gr.Row():
267
  openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)
268
 
269
+ with gr.Row():
270
+ image_input = gr.Image(label="Upload an Image", type="pil") # Image upload input
271
+ input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
272
+ audio_input = gr.Audio(label="Upload or Record Audio", type="filepath") # Audio upload or record input (using filepath)
273
 
274
  with gr.Row():
275
  reasoning_effort = gr.Dropdown(
 
280
  model_choice = gr.Dropdown(
281
  label="Select Model",
282
  choices=["o1", "o3-mini"],
283
+ value="o1" # Default to 'o1' for image-related tasks
284
  )
285
  submit_btn = gr.Button("Ask!", elem_id="submit-btn")
286
  clear_btn = gr.Button("Clear History", elem_id="clear-history")
287
 
288
  chat_history = gr.Chatbot()
289
 
 
 
 
 
 
 
 
 
 
290
  # Button interactions
291
+ submit_btn.click(fn=chatbot, inputs=[input_text, image_input, audio_input, openai_api_key, reasoning_effort, model_choice, chat_history], outputs=[input_text, chat_history])
292
  clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
293
 
 
 
294
  return demo
295
 
296
  # Run the interface
297
  if __name__ == "__main__":
298
  demo = create_interface()
299
+ demo.launch()