Update app.py
Browse files
app.py
CHANGED
@@ -1,13 +1,8 @@
|
|
1 |
import gradio as gr
|
2 |
import openai
|
3 |
import base64
|
4 |
-
import
|
5 |
import io
|
6 |
-
from helpers import text_to_speech, autoplay_audio, speech_to_text, get_api_key
|
7 |
-
from generate_answer import base_model_chatbot, with_pdf_chatbot
|
8 |
-
from audio_recorder_streamlit import audio_recorder
|
9 |
-
from streamlit_float import *
|
10 |
-
from PIL import Image as stImage
|
11 |
|
12 |
# Function to send the request to OpenAI API with an image or text input
|
13 |
def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"):
|
@@ -16,28 +11,29 @@ def generate_response(input_text, image, openai_api_key, reasoning_effort="mediu
|
|
16 |
|
17 |
openai.api_key = openai_api_key
|
18 |
|
19 |
-
#
|
20 |
if image:
|
21 |
# Convert the image to base64 string
|
22 |
image_info = get_base64_string_from_image(image)
|
23 |
input_text = f"data:image/png;base64,{image_info}"
|
24 |
|
25 |
-
#
|
26 |
-
if
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
messages = [
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
try:
|
36 |
# Call OpenAI API with the selected model
|
37 |
response = openai.ChatCompletion.create(
|
38 |
model=model_choice, # Dynamically choose the model (o1 or o3-mini)
|
39 |
messages=messages,
|
40 |
-
|
|
|
41 |
)
|
42 |
|
43 |
return response["choices"][0]["message"]["content"]
|
@@ -54,11 +50,7 @@ def get_base64_string_from_image(pil_image):
|
|
54 |
return base64_str
|
55 |
|
56 |
# The function that will be used by Gradio interface
|
57 |
-
def chatbot(input_text, image,
|
58 |
-
# If voice_audio is provided, convert it to text
|
59 |
-
if voice_audio:
|
60 |
-
input_text = speech_to_text(voice_audio) # Convert speech to text
|
61 |
-
|
62 |
response = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice)
|
63 |
|
64 |
# Append the response to the history
|
@@ -140,6 +132,7 @@ custom_css = """
|
|
140 |
#submit-btn:active {
|
141 |
transform: scale(0.95);
|
142 |
}
|
|
|
143 |
#clear-history {
|
144 |
background-color: #f04e4e; /* Slightly Darker red */
|
145 |
color: white;
|
@@ -212,13 +205,13 @@ custom_css = """
|
|
212 |
}
|
213 |
"""
|
214 |
|
215 |
-
# Gradio interface setup
|
216 |
def create_interface():
|
217 |
with gr.Blocks(css=custom_css) as demo:
|
218 |
gr.Markdown("""
|
219 |
<div class="gradio-header">
|
220 |
-
<h1>Multimodal Chatbot (Text + Image
|
221 |
-
<h3>Interact with a chatbot using text
|
222 |
</div>
|
223 |
""")
|
224 |
|
@@ -226,10 +219,9 @@ def create_interface():
|
|
226 |
with gr.Accordion("Click to expand for details", open=False):
|
227 |
gr.Markdown("""
|
228 |
### Description:
|
229 |
-
This is a multimodal chatbot that can handle text
|
230 |
- You can ask questions or provide text, and the assistant will respond.
|
231 |
-
- You can upload an image, and the assistant will process it and answer questions about the image.
|
232 |
-
- You can also speak to the assistant, and it will process your speech.
|
233 |
- Enter your OpenAI API key to start interacting with the model.
|
234 |
- You can use the 'Clear History' button to remove the conversation history.
|
235 |
- "o1" is for image chat and "o3-mini" is for text chat.
|
@@ -258,22 +250,18 @@ def create_interface():
|
|
258 |
choices=["o1", "o3-mini"],
|
259 |
value="o1" # Default to 'o1' for image-related tasks
|
260 |
)
|
261 |
-
|
262 |
-
|
263 |
-
with gr.Row():
|
264 |
-
voice_input = gr.Audio(label="Speak to the Assistant", type="filepath")
|
265 |
-
|
266 |
-
submit_btn = gr.Button("Ask!", elem_id="submit-btn")
|
267 |
-
clear_btn = gr.Button("Clear History", elem_id="clear-history")
|
268 |
|
269 |
chat_history = gr.Chatbot()
|
270 |
|
271 |
# Button interactions
|
272 |
-
submit_btn.click(fn=chatbot, inputs=[input_text, image_input,
|
273 |
clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
|
274 |
|
275 |
return demo
|
276 |
|
|
|
277 |
if __name__ == "__main__":
|
278 |
-
demo = create_interface()
|
279 |
-
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import openai
|
3 |
import base64
|
4 |
+
from PIL import Image
|
5 |
import io
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
# Function to send the request to OpenAI API with an image or text input
|
8 |
def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"):
|
|
|
11 |
|
12 |
openai.api_key = openai_api_key
|
13 |
|
14 |
+
# Process the input depending on whether it's text or an image
|
15 |
if image:
|
16 |
# Convert the image to base64 string
|
17 |
image_info = get_base64_string_from_image(image)
|
18 |
input_text = f"data:image/png;base64,{image_info}"
|
19 |
|
20 |
+
# Prepare the messages for OpenAI API
|
21 |
+
if model_choice == "o1":
|
22 |
+
messages = [
|
23 |
+
{"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_text}}]}
|
24 |
+
]
|
25 |
+
elif model_choice == "o3-mini":
|
26 |
+
messages = [
|
27 |
+
{"role": "user", "content": [{"type": "text", "text": input_text}]}
|
28 |
+
]
|
29 |
+
|
30 |
try:
|
31 |
# Call OpenAI API with the selected model
|
32 |
response = openai.ChatCompletion.create(
|
33 |
model=model_choice, # Dynamically choose the model (o1 or o3-mini)
|
34 |
messages=messages,
|
35 |
+
reasoning_effort=reasoning_effort, # Set reasoning_effort for the response
|
36 |
+
max_completion_tokens=2000 # Limit response tokens to 2000
|
37 |
)
|
38 |
|
39 |
return response["choices"][0]["message"]["content"]
|
|
|
50 |
return base64_str
|
51 |
|
52 |
# The function that will be used by Gradio interface
|
53 |
+
def chatbot(input_text, image, openai_api_key, reasoning_effort, model_choice, history=[]):
|
|
|
|
|
|
|
|
|
54 |
response = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice)
|
55 |
|
56 |
# Append the response to the history
|
|
|
132 |
#submit-btn:active {
|
133 |
transform: scale(0.95);
|
134 |
}
|
135 |
+
/* Clear History Button: Light Red */
|
136 |
#clear-history {
|
137 |
background-color: #f04e4e; /* Slightly Darker red */
|
138 |
color: white;
|
|
|
205 |
}
|
206 |
"""
|
207 |
|
208 |
+
# Gradio interface setup
|
209 |
def create_interface():
|
210 |
with gr.Blocks(css=custom_css) as demo:
|
211 |
gr.Markdown("""
|
212 |
<div class="gradio-header">
|
213 |
+
<h1>Multimodal Chatbot (Text + Image)</h1>
|
214 |
+
<h3>Interact with a chatbot using text or image inputs</h3>
|
215 |
</div>
|
216 |
""")
|
217 |
|
|
|
219 |
with gr.Accordion("Click to expand for details", open=False):
|
220 |
gr.Markdown("""
|
221 |
### Description:
|
222 |
+
This is a multimodal chatbot that can handle both text and image inputs.
|
223 |
- You can ask questions or provide text, and the assistant will respond.
|
224 |
+
- You can also upload an image, and the assistant will process it and answer questions about the image.
|
|
|
225 |
- Enter your OpenAI API key to start interacting with the model.
|
226 |
- You can use the 'Clear History' button to remove the conversation history.
|
227 |
- "o1" is for image chat and "o3-mini" is for text chat.
|
|
|
250 |
choices=["o1", "o3-mini"],
|
251 |
value="o1" # Default to 'o1' for image-related tasks
|
252 |
)
|
253 |
+
submit_btn = gr.Button("Ask!", elem_id="submit-btn")
|
254 |
+
clear_btn = gr.Button("Clear History", elem_id="clear-history")
|
|
|
|
|
|
|
|
|
|
|
255 |
|
256 |
chat_history = gr.Chatbot()
|
257 |
|
258 |
# Button interactions
|
259 |
+
submit_btn.click(fn=chatbot, inputs=[input_text, image_input, openai_api_key, reasoning_effort, model_choice, chat_history], outputs=[input_text, chat_history])
|
260 |
clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
|
261 |
|
262 |
return demo
|
263 |
|
264 |
+
# Run the interface
|
265 |
if __name__ == "__main__":
|
266 |
+
demo = create_interface()
|
267 |
+
demo.launch()
|