Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,8 @@ import openai
|
|
3 |
import base64
|
4 |
from PIL import Image
|
5 |
import io
|
|
|
|
|
6 |
|
7 |
# Function to send the request to OpenAI API with an image or text input
|
8 |
def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"):
|
@@ -49,8 +51,25 @@ def get_base64_string_from_image(pil_image):
|
|
49 |
base64_str = base64.b64encode(img_bytes).decode("utf-8")
|
50 |
return base64_str
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
# The function that will be used by Gradio interface
|
53 |
-
def chatbot(input_text, image, openai_api_key, reasoning_effort, model_choice, history=[]):
|
|
|
|
|
|
|
|
|
54 |
response = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice)
|
55 |
|
56 |
# Append the response to the history
|
@@ -98,7 +117,7 @@ custom_css = """
|
|
98 |
animation: fadeIn 2s ease-out;
|
99 |
}
|
100 |
/* Input field styles */
|
101 |
-
.gradio-textbox, .gradio-dropdown, .gradio-image {
|
102 |
border-radius: 8px;
|
103 |
border: 2px solid #ccc;
|
104 |
padding: 10px;
|
@@ -107,7 +126,7 @@ custom_css = """
|
|
107 |
font-size: 1rem;
|
108 |
transition: all 0.3s ease;
|
109 |
}
|
110 |
-
.gradio-textbox:focus, .gradio-dropdown:focus, .gradio-image:focus {
|
111 |
border-color: #007bff;
|
112 |
}
|
113 |
/* Button styles */
|
@@ -132,7 +151,6 @@ custom_css = """
|
|
132 |
#submit-btn:active {
|
133 |
transform: scale(0.95);
|
134 |
}
|
135 |
-
/* Clear History Button: Light Red */
|
136 |
#clear-history {
|
137 |
background-color: #f04e4e; /* Slightly Darker red */
|
138 |
color: white;
|
@@ -195,7 +213,7 @@ custom_css = """
|
|
195 |
.gradio-chatbot {
|
196 |
max-height: 400px;
|
197 |
}
|
198 |
-
.gradio-textbox, .gradio-dropdown, .gradio-image {
|
199 |
width: 100%;
|
200 |
}
|
201 |
#submit-btn, #clear-history {
|
@@ -210,8 +228,8 @@ def create_interface():
|
|
210 |
with gr.Blocks(css=custom_css) as demo:
|
211 |
gr.Markdown("""
|
212 |
<div class="gradio-header">
|
213 |
-
<h1>Multimodal Chatbot (Text + Image)</h1>
|
214 |
-
<h3>Interact with a chatbot using text or
|
215 |
</div>
|
216 |
""")
|
217 |
|
@@ -219,9 +237,10 @@ def create_interface():
|
|
219 |
with gr.Accordion("Click to expand for details", open=False):
|
220 |
gr.Markdown("""
|
221 |
### Description:
|
222 |
-
This is a multimodal chatbot that can handle
|
223 |
- You can ask questions or provide text, and the assistant will respond.
|
224 |
- You can also upload an image, and the assistant will process it and answer questions about the image.
|
|
|
225 |
- Enter your OpenAI API key to start interacting with the model.
|
226 |
- You can use the 'Clear History' button to remove the conversation history.
|
227 |
- "o1" is for image chat and "o3-mini" is for text chat.
|
@@ -238,6 +257,7 @@ def create_interface():
|
|
238 |
with gr.Row():
|
239 |
image_input = gr.Image(label="Upload an Image", type="pil") # Image upload input
|
240 |
input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
|
|
|
241 |
|
242 |
with gr.Row():
|
243 |
reasoning_effort = gr.Dropdown(
|
@@ -256,7 +276,7 @@ def create_interface():
|
|
256 |
chat_history = gr.Chatbot()
|
257 |
|
258 |
# Button interactions
|
259 |
-
submit_btn.click(fn=chatbot, inputs=[input_text, image_input, openai_api_key, reasoning_effort, model_choice, chat_history], outputs=[input_text, chat_history])
|
260 |
clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
|
261 |
|
262 |
return demo
|
|
|
3 |
import base64
|
4 |
from PIL import Image
|
5 |
import io
|
6 |
+
import openai
|
7 |
+
import os
|
8 |
|
9 |
# Function to send the request to OpenAI API with an image or text input
|
10 |
def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"):
|
|
|
51 |
base64_str = base64.b64encode(img_bytes).decode("utf-8")
|
52 |
return base64_str
|
53 |
|
54 |
+
# Function to transcribe audio to text using OpenAI Whisper API
|
55 |
+
def transcribe_audio(audio, openai_api_key):
|
56 |
+
if not openai_api_key:
|
57 |
+
return "Error: No API key provided."
|
58 |
+
|
59 |
+
openai.api_key = openai_api_key
|
60 |
+
try:
|
61 |
+
# Transcribe the audio to text
|
62 |
+
audio_file = openai.Audio.create(file=audio, model="whisper-1")
|
63 |
+
return audio_file['text']
|
64 |
+
except Exception as e:
|
65 |
+
return f"Error transcribing audio: {str(e)}"
|
66 |
+
|
67 |
# The function that will be used by Gradio interface
|
68 |
+
def chatbot(input_text, image, audio, openai_api_key, reasoning_effort, model_choice, history=[]):
|
69 |
+
# If there's audio, transcribe it to text
|
70 |
+
if audio:
|
71 |
+
input_text = transcribe_audio(audio, openai_api_key)
|
72 |
+
|
73 |
response = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice)
|
74 |
|
75 |
# Append the response to the history
|
|
|
117 |
animation: fadeIn 2s ease-out;
|
118 |
}
|
119 |
/* Input field styles */
|
120 |
+
.gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio {
|
121 |
border-radius: 8px;
|
122 |
border: 2px solid #ccc;
|
123 |
padding: 10px;
|
|
|
126 |
font-size: 1rem;
|
127 |
transition: all 0.3s ease;
|
128 |
}
|
129 |
+
.gradio-textbox:focus, .gradio-dropdown:focus, .gradio-image:focus, .gradio-audio:focus {
|
130 |
border-color: #007bff;
|
131 |
}
|
132 |
/* Button styles */
|
|
|
151 |
#submit-btn:active {
|
152 |
transform: scale(0.95);
|
153 |
}
|
|
|
154 |
#clear-history {
|
155 |
background-color: #f04e4e; /* Slightly Darker red */
|
156 |
color: white;
|
|
|
213 |
.gradio-chatbot {
|
214 |
max-height: 400px;
|
215 |
}
|
216 |
+
.gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio {
|
217 |
width: 100%;
|
218 |
}
|
219 |
#submit-btn, #clear-history {
|
|
|
228 |
with gr.Blocks(css=custom_css) as demo:
|
229 |
gr.Markdown("""
|
230 |
<div class="gradio-header">
|
231 |
+
<h1>Multimodal Chatbot (Text + Image + Voice)</h1>
|
232 |
+
<h3>Interact with a chatbot using text, image, or voice inputs</h3>
|
233 |
</div>
|
234 |
""")
|
235 |
|
|
|
237 |
with gr.Accordion("Click to expand for details", open=False):
|
238 |
gr.Markdown("""
|
239 |
### Description:
|
240 |
+
This is a multimodal chatbot that can handle text, image, and voice inputs.
|
241 |
- You can ask questions or provide text, and the assistant will respond.
|
242 |
- You can also upload an image, and the assistant will process it and answer questions about the image.
|
243 |
+
- Voice input is supported: You can upload or record an audio file, and it will be transcribed to text and sent to the assistant.
|
244 |
- Enter your OpenAI API key to start interacting with the model.
|
245 |
- You can use the 'Clear History' button to remove the conversation history.
|
246 |
- "o1" is for image chat and "o3-mini" is for text chat.
|
|
|
257 |
with gr.Row():
|
258 |
image_input = gr.Image(label="Upload an Image", type="pil") # Image upload input
|
259 |
input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
|
260 |
+
audio_input = gr.Audio(label="Upload or Record Audio", type="file") # Audio upload or record input
|
261 |
|
262 |
with gr.Row():
|
263 |
reasoning_effort = gr.Dropdown(
|
|
|
276 |
chat_history = gr.Chatbot()
|
277 |
|
278 |
# Button interactions
|
279 |
+
submit_btn.click(fn=chatbot, inputs=[input_text, image_input, audio_input, openai_api_key, reasoning_effort, model_choice, chat_history], outputs=[input_text, chat_history])
|
280 |
clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
|
281 |
|
282 |
return demo
|