|
import gradio as gr |
|
import openai |
|
import base64 |
|
from PIL import Image |
|
import io |
|
import os |
|
from helpers import text_to_speech, autoplay_audio, speech_to_text, get_api_key |
|
from generate_answer import base_model_chatbot, with_pdf_chatbot |
|
from audio_recorder_streamlit import audio_recorder |
|
from streamlit_float import * |
|
from PIL import Image as stImage |
|
|
|
|
|
def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"): |
|
if not openai_api_key: |
|
return "Error: No API key provided." |
|
|
|
openai.api_key = openai_api_key |
|
|
|
|
|
if image: |
|
|
|
image_info = get_base64_string_from_image(image) |
|
input_text = f"data:image/png;base64,{image_info}" |
|
|
|
|
|
if model_choice == "o1": |
|
messages = [ |
|
{"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_text}}]} |
|
] |
|
elif model_choice == "o3-mini": |
|
messages = [ |
|
{"role": "user", "content": [{"type": "text", "text": input_text}]} |
|
] |
|
|
|
try: |
|
|
|
response = openai.ChatCompletion.create( |
|
model=model_choice, |
|
messages=messages, |
|
reasoning_effort=reasoning_effort, |
|
max_completion_tokens=2000 |
|
) |
|
|
|
return response["choices"][0]["message"]["content"] |
|
except Exception as e: |
|
return f"Error calling OpenAI API: {str(e)}" |
|
|
|
|
|
def get_base64_string_from_image(pil_image): |
|
|
|
buffered = io.BytesIO() |
|
pil_image.save(buffered, format="PNG") |
|
img_bytes = buffered.getvalue() |
|
base64_str = base64.b64encode(img_bytes).decode("utf-8") |
|
return base64_str |
|
|
|
|
|
def chatbot(input_text, image, openai_api_key, reasoning_effort, model_choice, history=[]): |
|
response = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice) |
|
|
|
|
|
history.append((f"User: {input_text}", f"Assistant: {response}")) |
|
|
|
return "", history |
|
|
|
|
|
def clear_history(): |
|
return "", [] |
|
|
|
|
|
custom_css = """ |
|
/* General body styles */ |
|
.gradio-container { |
|
font-family: 'Arial', sans-serif; |
|
background-color: #f8f9fa; |
|
color: #333; |
|
} |
|
/* Header styles */ |
|
.gradio-header { |
|
background-color: #007bff; |
|
color: white; |
|
padding: 20px; |
|
text-align: center; |
|
border-radius: 8px; |
|
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); |
|
animation: fadeIn 1s ease-out; |
|
} |
|
.gradio-header h1 { |
|
font-size: 2.5rem; |
|
} |
|
.gradio-header h3 { |
|
font-size: 1.2rem; |
|
margin-top: 10px; |
|
} |
|
/* Chatbot container styles */ |
|
.gradio-chatbot { |
|
background-color: #fff; |
|
border-radius: 10px; |
|
padding: 20px; |
|
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); |
|
max-height: 500px; |
|
overflow-y: auto; |
|
animation: fadeIn 2s ease-out; |
|
} |
|
/* Input field styles */ |
|
.gradio-textbox, .gradio-dropdown, .gradio-image { |
|
border-radius: 8px; |
|
border: 2px solid #ccc; |
|
padding: 10px; |
|
margin-bottom: 10px; |
|
width: 100%; |
|
font-size: 1rem; |
|
transition: all 0.3s ease; |
|
} |
|
.gradio-textbox:focus, .gradio-dropdown:focus, .gradio-image:focus { |
|
border-color: #007bff; |
|
} |
|
/* Button styles */ |
|
/* Send Button: Sky Blue */ |
|
#submit-btn { |
|
background-color: #00aaff; /* Sky blue */ |
|
color: white; |
|
border: none; |
|
border-radius: 8px; |
|
padding: 10px 19px; |
|
font-size: 1.1rem; |
|
cursor: pointer; |
|
transition: all 0.3s ease; |
|
margin-left: auto; |
|
margin-right: auto; |
|
display: block; |
|
margin-top: 10px; |
|
} |
|
#submit-btn:hover { |
|
background-color: #0099cc; /* Slightly darker blue */ |
|
} |
|
#submit-btn:active { |
|
transform: scale(0.95); |
|
} |
|
#clear-history { |
|
background-color: #f04e4e; /* Slightly Darker red */ |
|
color: white; |
|
border: none; |
|
border-radius: 8px; |
|
padding: 10px 13px; |
|
font-size: 1.1rem; |
|
cursor: pointer; |
|
transition: all 0.3s ease; |
|
margin-top: 10px; |
|
} |
|
#clear-history:hover { |
|
background-color: #f5a4a4; /* Light red */ |
|
} |
|
#clear-history:active { |
|
transform: scale(0.95); |
|
} |
|
/* Chat history styles */ |
|
.gradio-chatbot .message { |
|
margin-bottom: 10px; |
|
} |
|
.gradio-chatbot .user { |
|
background-color: #007bff; |
|
color: white; |
|
padding: 10px; |
|
border-radius: 12px; |
|
max-width: 70%; |
|
animation: slideInUser 0.5s ease-out; |
|
} |
|
.gradio-chatbot .assistant { |
|
background-color: #f1f1f1; |
|
color: #333; |
|
padding: 10px; |
|
border-radius: 12px; |
|
max-width: 70%; |
|
margin-left: auto; |
|
animation: slideInAssistant 0.5s ease-out; |
|
} |
|
/* Animation keyframes */ |
|
@keyframes fadeIn { |
|
0% { opacity: 0; } |
|
100% { opacity: 1; } |
|
} |
|
@keyframes slideInUser { |
|
0% { transform: translateX(-100%); } |
|
100% { transform: translateX(0); } |
|
} |
|
@keyframes slideInAssistant { |
|
0% { transform: translateX(100%); } |
|
100% { transform: translateX(0); } |
|
} |
|
/* Mobile responsiveness */ |
|
@media (max-width: 768px) { |
|
.gradio-header h1 { |
|
font-size: 1.8rem; |
|
} |
|
.gradio-header h3 { |
|
font-size: 1rem; |
|
} |
|
.gradio-chatbot { |
|
max-height: 400px; |
|
} |
|
.gradio-textbox, .gradio-dropdown, .gradio-image { |
|
width: 100%; |
|
} |
|
#submit-btn, #clear-history { |
|
width: 100%; |
|
margin-left: 0; |
|
} |
|
} |
|
""" |
|
|
|
|
|
def create_interface(): |
|
with gr.Blocks(css=custom_css) as demo: |
|
gr.Markdown(""" |
|
<div class="gradio-header"> |
|
<h1>Multimodal Chatbot (Text + Image)</h1> |
|
<h3>Interact with a chatbot using text or image inputs</h3> |
|
</div> |
|
""") |
|
|
|
|
|
with gr.Accordion("Click to expand for details", open=False): |
|
gr.Markdown(""" |
|
### Description: |
|
This is a multimodal chatbot that can handle both text and image inputs. |
|
- You can ask questions or provide text, and the assistant will respond. |
|
- You can also upload an image, and the assistant will process it and answer questions about the image. |
|
- Enter your OpenAI API key to start interacting with the model. |
|
- You can use the 'Clear History' button to remove the conversation history. |
|
- "o1" is for image chat and "o3-mini" is for text chat. |
|
### Reasoning Effort: |
|
The reasoning effort controls how complex or detailed the assistant's answers should be. |
|
- **Low**: Provides quick, concise answers with minimal reasoning or details. |
|
- **Medium**: Offers a balanced response with a reasonable level of detail and thought. |
|
- **High**: Produces more detailed, analytical, or thoughtful responses, requiring deeper reasoning. |
|
""") |
|
|
|
with gr.Row(): |
|
openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True) |
|
|
|
with gr.Row(): |
|
image_input = gr.Image(label="Upload an Image", type="pil") |
|
input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2) |
|
|
|
with gr.Row(): |
|
reasoning_effort = gr.Dropdown( |
|
label="Reasoning Effort", |
|
choices=["low", "medium", "high"], |
|
value="medium" |
|
) |
|
model_choice = gr.Dropdown( |
|
label="Select Model", |
|
choices=["o1", "o3-mini"], |
|
value="o1" |
|
) |
|
submit_btn = gr.Button("Ask!", elem_id="submit-btn") |
|
clear_btn = gr.Button("Clear History", elem_id="clear-history") |
|
|
|
chat_history = gr.Chatbot() |
|
|
|
|
|
submit_btn.click(fn=chatbot, inputs=[input_text, image_input, openai_api_key, reasoning_effort, model_choice, chat_history], outputs=[input_text, chat_history]) |
|
clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history]) |
|
|
|
return demo |
|
|
|
|
|
def voice_chat(): |
|
|
|
float_init() |
|
|
|
|
|
api_key = get_api_key() |
|
if not api_key: |
|
gr.error("You must provide a valid OpenAI API Key to proceed.") |
|
return |
|
|
|
def initialize_session_state(): |
|
if "messages" not in gr.session_state: |
|
gr.session_state.messages = [ |
|
{"role": "assistant", "content": "Hi! How may I assist you today? (Please Speak Clearly)"} |
|
] |
|
|
|
initialize_session_state() |
|
|
|
gr.title("OpenAI Conversational Chatbot (Voice Interaction) 🤖") |
|
|
|
|
|
footer_container = gr.container() |
|
|
|
with footer_container: |
|
audio_bytes = audio_recorder() |
|
|
|
for message in gr.session_state.messages: |
|
with gr.chat_message(message["role"]): |
|
gr.write(message["content"]) |
|
|
|
if audio_bytes: |
|
|
|
with gr.spinner("Transcribing..."): |
|
webm_file_path = "temp_audio.mp3" |
|
with open(webm_file_path, "wb") as f: |
|
f.write(audio_bytes) |
|
|
|
transcript = speech_to_text(webm_file_path) |
|
if transcript: |
|
gr.session_state.messages.append({"role": "user", "content": transcript}) |
|
with gr.chat_message("user"): |
|
gr.write(transcript) |
|
os.remove(webm_file_path) |
|
|
|
if gr.session_state.messages[-1]["role"] != "assistant": |
|
with gr.chat_message("assistant"): |
|
with gr.spinner("Thinking🤔..."): |
|
final_response = base_model_chatbot(gr.session_state.messages) |
|
|
|
|
|
if not final_response.strip()[-1] in ".!?": |
|
final_response += " This is the end of the response. Let me know if you need anything else." |
|
|
|
with gr.spinner("Generating audio response..."): |
|
audio_file = text_to_speech(final_response) |
|
autoplay_audio(audio_file) |
|
gr.write(final_response) |
|
gr.session_state.messages.append({"role": "assistant", "content": final_response}) |
|
os.remove(audio_file) |
|
|
|
|
|
footer_container.float("bottom: 0rem;") |
|
|
|
if __name__ == "__main__": |
|
demo = create_interface() |
|
demo.launch() |
|
|
|
|
|
voice_chat() |