Update app.py
Browse files
app.py
CHANGED
@@ -238,91 +238,31 @@ custom_css = """
|
|
238 |
# Gradio interface setup
|
239 |
def create_interface():
|
240 |
with gr.Blocks(css=custom_css) as demo:
|
241 |
-
gr.Markdown("""
|
242 |
-
<div class="gradio-header">
|
243 |
-
<h1>Multimodal Chatbot (Text + Image + Voice)</h1>
|
244 |
-
<h3>Interact with a chatbot using text, image, or voice inputs</h3>
|
245 |
-
</div>
|
246 |
-
""")
|
247 |
-
|
248 |
-
# Add a description with an expandable accordion
|
249 |
-
with gr.Accordion("Click to expand for details", open=False):
|
250 |
-
gr.Markdown("""
|
251 |
-
### Description:
|
252 |
-
This is a multimodal chatbot that can handle text, image, and voice inputs.
|
253 |
-
- You can ask questions or provide text, and the assistant will respond.
|
254 |
-
- You can also upload an image, and the assistant will process it and answer questions about the image.
|
255 |
-
- Voice input is supported: You can upload or record an audio file, and it will be transcribed to text and sent to the assistant.
|
256 |
-
- Enter your OpenAI API key to start interacting with the model.
|
257 |
-
- You can use the 'Clear History' button to remove the conversation history.
|
258 |
-
- "o1" is for image chat and "o3-mini" is for text chat.
|
259 |
-
### Reasoning Effort:
|
260 |
-
The reasoning effort controls how complex or detailed the assistant's answers should be.
|
261 |
-
- **Low**: Provides quick, concise answers with minimal reasoning or details.
|
262 |
-
- **Medium**: Offers a balanced response with a reasonable level of detail and thought.
|
263 |
-
- **High**: Produces more detailed, analytical, or thoughtful responses, requiring deeper reasoning.
|
264 |
-
""")
|
265 |
|
266 |
-
with gr.
|
267 |
-
|
268 |
-
|
|
|
|
|
269 |
input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
|
270 |
-
|
271 |
-
label="Reasoning Effort",
|
272 |
-
choices=["low", "medium", "high"],
|
273 |
-
value="medium"
|
274 |
-
)
|
275 |
-
model_choice = gr.Dropdown(
|
276 |
-
label="Select Model",
|
277 |
-
choices=["o1", "o3-mini"],
|
278 |
-
value="o3-mini" # Default to 'o3-mini' for text-based tasks
|
279 |
-
)
|
280 |
-
submit_btn = gr.Button("Ask!", elem_id="submit-btn")
|
281 |
chat_history = gr.Chatbot()
|
|
|
282 |
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
with gr.Tab("Image Chat"):
|
287 |
-
image_input = gr.Image(label="Upload an Image", type="pil") # Image upload input
|
288 |
-
reasoning_effort = gr.Dropdown(
|
289 |
-
label="Reasoning Effort",
|
290 |
-
choices=["low", "medium", "high"],
|
291 |
-
value="medium"
|
292 |
-
)
|
293 |
-
model_choice = gr.Dropdown(
|
294 |
-
label="Select Model",
|
295 |
-
choices=["o1", "o3-mini"],
|
296 |
-
value="o1" # Default to 'o1' for image-related tasks
|
297 |
-
)
|
298 |
-
submit_btn = gr.Button("Ask!", elem_id="submit-btn")
|
299 |
chat_history = gr.Chatbot()
|
|
|
300 |
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
with gr.Tab("Voice Chat"):
|
305 |
-
audio_input = gr.Audio(label="Upload or Record Audio", type="filepath") # Audio upload or record input (using filepath)
|
306 |
-
reasoning_effort = gr.Dropdown(
|
307 |
-
label="Reasoning Effort",
|
308 |
-
choices=["low", "medium", "high"],
|
309 |
-
value="medium"
|
310 |
-
)
|
311 |
-
model_choice = gr.Dropdown(
|
312 |
-
label="Select Model",
|
313 |
-
choices=["o1", "o3-mini"],
|
314 |
-
value="o3-mini" # Default to 'o3-mini' for voice-related tasks
|
315 |
-
)
|
316 |
-
submit_btn = gr.Button("Ask!", elem_id="submit-btn")
|
317 |
chat_history = gr.Chatbot()
|
|
|
318 |
|
319 |
-
|
320 |
-
|
321 |
-
# Clear history button
|
322 |
-
clear_btn = gr.Button("Clear History", elem_id="clear-history")
|
323 |
-
clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
|
324 |
-
|
325 |
-
return demo
|
326 |
|
327 |
# Run the interface
|
328 |
if __name__ == "__main__":
|
|
|
238 |
# Gradio interface setup
|
239 |
def create_interface():
|
240 |
with gr.Blocks(css=custom_css) as demo:
|
241 |
+
gr.Markdown("""<div class="gradio-header"><h1>Multimodal Chatbot (Text + Image + Voice)</h1><h3>Interact with a chatbot using text, image, or voice inputs</h3></div>""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
242 |
|
243 |
+
with gr.Accordion("Click to expand for details", open=False):
|
244 |
+
gr.Markdown("""### Description: This is a multimodal chatbot that can handle text, image, and voice inputs. [Explanation Here]""")
|
245 |
+
|
246 |
+
with gr.TabbedInterface():
|
247 |
+
with gr.TabItem("Text Chat"):
|
248 |
input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
|
249 |
+
submit_btn = gr.Button("Ask!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
chat_history = gr.Chatbot()
|
251 |
+
submit_btn.click(fn=chatbot, inputs=[input_text, None, None, gr.Textbox(), "medium", "o3-mini", chat_history], outputs=[input_text, chat_history])
|
252 |
|
253 |
+
with gr.TabItem("Image Chat"):
|
254 |
+
image_input = gr.Image(label="Upload an Image", type="pil")
|
255 |
+
submit_btn = gr.Button("Ask!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
chat_history = gr.Chatbot()
|
257 |
+
submit_btn.click(fn=chatbot, inputs=[None, image_input, None, gr.Textbox(), "medium", "o1", chat_history], outputs=[input_text, chat_history])
|
258 |
|
259 |
+
with gr.TabItem("Voice Chat"):
|
260 |
+
audio_input = gr.Audio(label="Upload or Record Audio", type="filepath")
|
261 |
+
submit_btn = gr.Button("Ask!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
chat_history = gr.Chatbot()
|
263 |
+
submit_btn.click(fn=chatbot, inputs=[None, None, audio_input, gr.Textbox(), "medium", "o3-mini", chat_history], outputs=[input_text, chat_history])
|
264 |
|
265 |
+
return demo
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
|
267 |
# Run the interface
|
268 |
if __name__ == "__main__":
|