Update app.py
Browse files
app.py
CHANGED
@@ -77,20 +77,15 @@ def transcribe_audio(audio, openai_api_key):
|
|
77 |
return f"Error transcribing audio: {str(e)}"
|
78 |
|
79 |
# The function that will be used by Gradio interface
|
80 |
-
def chatbot(input_text, image, audio, openai_api_key, reasoning_effort, model_choice,
|
81 |
# If there's audio, transcribe it to text
|
82 |
-
if audio
|
83 |
input_text = transcribe_audio(audio, openai_api_key)
|
84 |
|
85 |
-
|
86 |
-
|
87 |
-
input_text = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice)
|
88 |
-
elif input_mode == "Text" and input_text:
|
89 |
-
# If Text Mode is selected
|
90 |
-
input_text = generate_response(input_text, None, openai_api_key, reasoning_effort, model_choice)
|
91 |
-
|
92 |
# Append the response to the history
|
93 |
-
history.append((f"User: {input_text}", f"Assistant: {
|
94 |
|
95 |
return "", history
|
96 |
|
@@ -243,25 +238,38 @@ custom_css = """
|
|
243 |
# Gradio interface setup
|
244 |
def create_interface():
|
245 |
with gr.Blocks(css=custom_css) as demo:
|
246 |
-
gr.Markdown("""
|
247 |
-
<
|
248 |
-
|
249 |
-
|
|
|
|
|
250 |
|
251 |
-
#
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
|
258 |
with gr.Row():
|
259 |
openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)
|
260 |
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
|
266 |
with gr.Row():
|
267 |
reasoning_effort = gr.Dropdown(
|
@@ -272,32 +280,20 @@ def create_interface():
|
|
272 |
model_choice = gr.Dropdown(
|
273 |
label="Select Model",
|
274 |
choices=["o1", "o3-mini"],
|
275 |
-
value="o1"
|
276 |
)
|
277 |
submit_btn = gr.Button("Ask!", elem_id="submit-btn")
|
278 |
clear_btn = gr.Button("Clear History", elem_id="clear-history")
|
279 |
|
280 |
chat_history = gr.Chatbot()
|
281 |
|
282 |
-
# Dynamically control the input visibility based on the selected mode
|
283 |
-
def toggle_inputs(input_mode):
|
284 |
-
if input_mode == "Text":
|
285 |
-
return input_text, None, None
|
286 |
-
elif input_mode == "Image":
|
287 |
-
return input_text, image_input, None
|
288 |
-
else: # Voice
|
289 |
-
return None, None, audio_input
|
290 |
-
|
291 |
# Button interactions
|
292 |
-
submit_btn.click(fn=chatbot, inputs=[input_text, image_input, audio_input, openai_api_key, reasoning_effort, model_choice,
|
293 |
clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
|
294 |
|
295 |
-
input_mode.change(toggle_inputs, inputs=[input_mode], outputs=[input_text, image_input, audio_input])
|
296 |
-
|
297 |
return demo
|
298 |
|
299 |
# Run the interface
|
300 |
if __name__ == "__main__":
|
301 |
demo = create_interface()
|
302 |
-
demo.launch()
|
303 |
-
|
|
|
77 |
return f"Error transcribing audio: {str(e)}"
|
78 |
|
79 |
# The function that will be used by Gradio interface
|
80 |
+
def chatbot(input_text, image, audio, openai_api_key, reasoning_effort, model_choice, history=[]):
|
81 |
# If there's audio, transcribe it to text
|
82 |
+
if audio:
|
83 |
input_text = transcribe_audio(audio, openai_api_key)
|
84 |
|
85 |
+
response = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice)
|
86 |
+
|
|
|
|
|
|
|
|
|
|
|
87 |
# Append the response to the history
|
88 |
+
history.append((f"User: {input_text}", f"Assistant: {response}"))
|
89 |
|
90 |
return "", history
|
91 |
|
|
|
238 |
# Gradio interface setup
|
239 |
def create_interface():
|
240 |
with gr.Blocks(css=custom_css) as demo:
|
241 |
+
gr.Markdown("""
|
242 |
+
<div class="gradio-header">
|
243 |
+
<h1>Multimodal Chatbot (Text + Image + Voice)</h1>
|
244 |
+
<h3>Interact with a chatbot using text, image, or voice inputs</h3>
|
245 |
+
</div>
|
246 |
+
""")
|
247 |
|
248 |
+
# Add a description with an expandable accordion
|
249 |
+
with gr.Accordion("Click to expand for details", open=False):
|
250 |
+
gr.Markdown("""
|
251 |
+
### Description:
|
252 |
+
This is a multimodal chatbot that can handle text, image, and voice inputs.
|
253 |
+
- You can ask questions or provide text, and the assistant will respond.
|
254 |
+
- You can also upload an image, and the assistant will process it and answer questions about the image.
|
255 |
+
- Voice input is supported: You can upload or record an audio file, and it will be transcribed to text and sent to the assistant.
|
256 |
+
- Enter your OpenAI API key to start interacting with the model.
|
257 |
+
- You can use the 'Clear History' button to remove the conversation history.
|
258 |
+
- "o1" is for image chat and "o3-mini" is for text chat.
|
259 |
+
### Reasoning Effort:
|
260 |
+
The reasoning effort controls how complex or detailed the assistant's answers should be.
|
261 |
+
- **Low**: Provides quick, concise answers with minimal reasoning or details.
|
262 |
+
- **Medium**: Offers a balanced response with a reasonable level of detail and thought.
|
263 |
+
- **High**: Produces more detailed, analytical, or thoughtful responses, requiring deeper reasoning.
|
264 |
+
""")
|
265 |
|
266 |
with gr.Row():
|
267 |
openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)
|
268 |
|
269 |
+
with gr.Row():
|
270 |
+
image_input = gr.Image(label="Upload an Image", type="pil") # Image upload input
|
271 |
+
input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
|
272 |
+
audio_input = gr.Audio(label="Upload or Record Audio", type="filepath") # Audio upload or record input (using filepath)
|
273 |
|
274 |
with gr.Row():
|
275 |
reasoning_effort = gr.Dropdown(
|
|
|
280 |
model_choice = gr.Dropdown(
|
281 |
label="Select Model",
|
282 |
choices=["o1", "o3-mini"],
|
283 |
+
value="o1" # Default to 'o1' for image-related tasks
|
284 |
)
|
285 |
submit_btn = gr.Button("Ask!", elem_id="submit-btn")
|
286 |
clear_btn = gr.Button("Clear History", elem_id="clear-history")
|
287 |
|
288 |
chat_history = gr.Chatbot()
|
289 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
290 |
# Button interactions
|
291 |
+
submit_btn.click(fn=chatbot, inputs=[input_text, image_input, audio_input, openai_api_key, reasoning_effort, model_choice, chat_history], outputs=[input_text, chat_history])
|
292 |
clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
|
293 |
|
|
|
|
|
294 |
return demo
|
295 |
|
296 |
# Run the interface
|
297 |
if __name__ == "__main__":
|
298 |
demo = create_interface()
|
299 |
+
demo.launch()
|
|