Update app.py
Browse files
app.py
CHANGED
@@ -210,13 +210,13 @@ custom_css = """
|
|
210 |
}
|
211 |
"""
|
212 |
|
213 |
-
# Gradio interface setup for multimodal chatbot
|
214 |
def create_interface():
|
215 |
with gr.Blocks(css=custom_css) as demo:
|
216 |
gr.Markdown("""
|
217 |
<div class="gradio-header">
|
218 |
-
<h1>Multimodal Chatbot (Text + Image)</h1>
|
219 |
-
<h3>Interact with a chatbot using text or
|
220 |
</div>
|
221 |
""")
|
222 |
|
@@ -224,9 +224,10 @@ def create_interface():
|
|
224 |
with gr.Accordion("Click to expand for details", open=False):
|
225 |
gr.Markdown("""
|
226 |
### Description:
|
227 |
-
This is a multimodal chatbot that can handle
|
228 |
- You can ask questions or provide text, and the assistant will respond.
|
229 |
-
- You can
|
|
|
230 |
- Enter your OpenAI API key to start interacting with the model.
|
231 |
- You can use the 'Clear History' button to remove the conversation history.
|
232 |
- "o1" is for image chat and "o3-mini" is for text chat.
|
@@ -255,8 +256,13 @@ def create_interface():
|
|
255 |
choices=["o1", "o3-mini"],
|
256 |
value="o1" # Default to 'o1' for image-related tasks
|
257 |
)
|
258 |
-
|
259 |
-
|
|
|
|
|
|
|
|
|
|
|
260 |
|
261 |
chat_history = gr.Chatbot()
|
262 |
|
@@ -266,73 +272,6 @@ def create_interface():
|
|
266 |
|
267 |
return demo
|
268 |
|
269 |
-
# Voice interaction (audio chat) setup for Gradio
|
270 |
-
def voice_chat():
|
271 |
-
# Float feature initialization
|
272 |
-
float_init()
|
273 |
-
|
274 |
-
# Prompt for API key
|
275 |
-
api_key = get_api_key()
|
276 |
-
if not api_key:
|
277 |
-
gr.error("You must provide a valid OpenAI API Key to proceed.")
|
278 |
-
return
|
279 |
-
|
280 |
-
def initialize_session_state():
|
281 |
-
if "messages" not in gr.session_state:
|
282 |
-
gr.session_state.messages = [
|
283 |
-
{"role": "assistant", "content": "Hi! How may I assist you today? (Please Speak Clearly)"}
|
284 |
-
]
|
285 |
-
|
286 |
-
initialize_session_state()
|
287 |
-
|
288 |
-
gr.title("OpenAI Conversational Chatbot (Voice Interaction) 🤖")
|
289 |
-
|
290 |
-
# Footer container for the microphone
|
291 |
-
footer_container = gr.container()
|
292 |
-
|
293 |
-
with footer_container:
|
294 |
-
audio_bytes = audio_recorder()
|
295 |
-
|
296 |
-
for message in gr.session_state.messages:
|
297 |
-
with gr.chat_message(message["role"]):
|
298 |
-
gr.write(message["content"])
|
299 |
-
|
300 |
-
if audio_bytes:
|
301 |
-
# Write the audio bytes to a file
|
302 |
-
with gr.spinner("Transcribing..."):
|
303 |
-
webm_file_path = "temp_audio.mp3"
|
304 |
-
with open(webm_file_path, "wb") as f:
|
305 |
-
f.write(audio_bytes)
|
306 |
-
|
307 |
-
transcript = speech_to_text(webm_file_path)
|
308 |
-
if transcript:
|
309 |
-
gr.session_state.messages.append({"role": "user", "content": transcript})
|
310 |
-
with gr.chat_message("user"):
|
311 |
-
gr.write(transcript)
|
312 |
-
os.remove(webm_file_path)
|
313 |
-
|
314 |
-
if gr.session_state.messages[-1]["role"] != "assistant":
|
315 |
-
with gr.chat_message("assistant"):
|
316 |
-
with gr.spinner("Thinking🤔..."):
|
317 |
-
final_response = base_model_chatbot(gr.session_state.messages)
|
318 |
-
|
319 |
-
# Final check for punctuation and completeness
|
320 |
-
if not final_response.strip()[-1] in ".!?":
|
321 |
-
final_response += " This is the end of the response. Let me know if you need anything else."
|
322 |
-
|
323 |
-
with gr.spinner("Generating audio response..."):
|
324 |
-
audio_file = text_to_speech(final_response)
|
325 |
-
autoplay_audio(audio_file)
|
326 |
-
gr.write(final_response)
|
327 |
-
gr.session_state.messages.append({"role": "assistant", "content": final_response})
|
328 |
-
os.remove(audio_file)
|
329 |
-
|
330 |
-
# Float the footer container and provide CSS to target it with
|
331 |
-
footer_container.float("bottom: 0rem;")
|
332 |
-
|
333 |
if __name__ == "__main__":
|
334 |
demo = create_interface() # Gradio multimodal chatbot
|
335 |
-
demo.launch()
|
336 |
-
|
337 |
-
# Gradio voice chat
|
338 |
-
voice_chat()
|
|
|
210 |
}
|
211 |
"""
|
212 |
|
213 |
+
# Gradio interface setup for multimodal chatbot with voice functionality
|
214 |
def create_interface():
|
215 |
with gr.Blocks(css=custom_css) as demo:
|
216 |
gr.Markdown("""
|
217 |
<div class="gradio-header">
|
218 |
+
<h1>Multimodal Chatbot (Text + Image + Voice)</h1>
|
219 |
+
<h3>Interact with a chatbot using text, image, or voice inputs</h3>
|
220 |
</div>
|
221 |
""")
|
222 |
|
|
|
224 |
with gr.Accordion("Click to expand for details", open=False):
|
225 |
gr.Markdown("""
|
226 |
### Description:
|
227 |
+
This is a multimodal chatbot that can handle text, image, and voice inputs.
|
228 |
- You can ask questions or provide text, and the assistant will respond.
|
229 |
+
- You can upload an image, and the assistant will process it and answer questions about the image.
|
230 |
+
- You can also speak to the assistant, and it will process your speech.
|
231 |
- Enter your OpenAI API key to start interacting with the model.
|
232 |
- You can use the 'Clear History' button to remove the conversation history.
|
233 |
- "o1" is for image chat and "o3-mini" is for text chat.
|
|
|
256 |
choices=["o1", "o3-mini"],
|
257 |
value="o1" # Default to 'o1' for image-related tasks
|
258 |
)
|
259 |
+
|
260 |
+
# Audio input (voice interaction)
|
261 |
+
with gr.Row():
|
262 |
+
voice_input = gr.Audio(label="Speak to the Assistant", type="filepath")
|
263 |
+
|
264 |
+
submit_btn = gr.Button("Ask!", elem_id="submit-btn")
|
265 |
+
clear_btn = gr.Button("Clear History", elem_id="clear-history")
|
266 |
|
267 |
chat_history = gr.Chatbot()
|
268 |
|
|
|
272 |
|
273 |
return demo
|
274 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
if __name__ == "__main__":
|
276 |
demo = create_interface() # Gradio multimodal chatbot
|
277 |
+
demo.launch()
|
|
|
|
|
|