|
import gradio as gr |
|
import openai |
|
import base64 |
|
from PIL import Image |
|
import io |
|
|
|
# Function to send the request to OpenAI API with an image or text input |
|
def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"): |
|
if not openai_api_key: |
|
return "Error: No API key provided." |
|
|
|
openai.api_key = openai_api_key |
|
|
|
# Process the input depending on whether it's text or an image |
|
if image: |
|
# Convert the image to base64 string |
|
image_info = get_base64_string_from_image(image) |
|
input_text = f"data:image/png;base64,{image_info}" |
|
|
|
# Prepare the messages for OpenAI API |
|
if model_choice == "o1": |
|
messages = [ |
|
{"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_text}}]} |
|
] |
|
elif model_choice == "o3-mini": |
|
messages = [ |
|
{"role": "user", "content": [{"type": "text", "text": input_text}]} |
|
] |
|
|
|
try: |
|
# Call OpenAI API with the selected model |
|
response = openai.ChatCompletion.create( |
|
model=model_choice, # Dynamically choose the model (o1 or o3-mini) |
|
messages=messages, |
|
reasoning_effort=reasoning_effort, # Set reasoning_effort for the response |
|
max_completion_tokens=2000 # Limit response tokens to 2000 |
|
) |
|
|
|
return response["choices"][0]["message"]["content"] |
|
except Exception as e: |
|
return f"Error calling OpenAI API: {str(e)}" |
|
|
|
# Function to convert an uploaded image to a base64 string |
|
def get_base64_string_from_image(pil_image): |
|
# Convert PIL Image to bytes |
|
buffered = io.BytesIO() |
|
pil_image.save(buffered, format="PNG") |
|
img_bytes = buffered.getvalue() |
|
base64_str = base64.b64encode(img_bytes).decode("utf-8") |
|
return base64_str |
|
|
|
# The function that will be used by Gradio interface |
|
def chatbot(input_text, image, openai_api_key, reasoning_effort, model_choice, history=[]): |
|
response = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice) |
|
|
|
# Append the response to the history |
|
history.append((f"User: {input_text}", f"Assistant: {response}")) |
|
|
|
return "", history |
|
|
|
# Function to clear the chat history |
|
def clear_history(): |
|
return "", [] |
|
|
|
# Gradio interface setup |
|
def create_interface(): |
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Multimodal Chatbot (Text + Image)") |
|
|
|
# Add a description after the title |
|
gr.Markdown(""" |
|
### Description: |
|
This is a multimodal chatbot that can handle both text and image inputs. |
|
- You can ask questions or provide text, and the assistant will respond. |
|
- You can also upload an image, and the assistant will process it and answer questions about the image. |
|
- Enter your OpenAI API key to start interacting with the model. |
|
- You can use the 'Clear History' button to remove the conversation history. |
|
- "o1" is for image chat and "o3-mini" is for text chat. |
|
### Reasoning Effort: |
|
The reasoning effort controls how complex or detailed the assistant's answers should be. |
|
- **Low**: Provides quick, concise answers with minimal reasoning or details. |
|
- **Medium**: Offers a balanced response with a reasonable level of detail and thought. |
|
- **High**: Produces more detailed, analytical, or thoughtful responses, requiring deeper reasoning. |
|
""") |
|
|
|
with gr.Row(): |
|
openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True) |
|
|
|
with gr.Row(): |
|
image_input = gr.Image(label="Upload an Image", type="pil") # Image upload input |
|
input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2) |
|
|
|
with gr.Row(): |
|
reasoning_effort = gr.Dropdown( |
|
label="Reasoning Effort", |
|
choices=["low", "medium", "high"], |
|
value="medium" |
|
) |
|
model_choice = gr.Dropdown( |
|
label="Select Model", |
|
choices=["o1", "o3-mini"], |
|
value="o1" # Default to 'o1' for image-related tasks |
|
) |
|
submit_btn = gr.Button("Send") |
|
clear_btn = gr.Button("Clear History") |
|
|
|
chat_history = gr.Chatbot() |
|
|
|
# Button interactions |
|
submit_btn.click(fn=chatbot, inputs=[input_text, image_input, openai_api_key, reasoning_effort, model_choice, chat_history], outputs=[input_text, chat_history]) |
|
clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history]) |
|
|
|
return demo |
|
|
|
# Run the interface |
|
if __name__ == "__main__": |
|
demo = create_interface() |
|
demo.launch() |