File size: 4,803 Bytes
eaa4360 5016e38 eaa4360 5016e38 b1a1b1c eaa4360 5016e38 b1a1b1c eaa4360 b1a1b1c eaa4360 b1a1b1c 5016e38 ca39e4c eb511aa eaa4360 5016e38 eaa4360 5b332f1 eaa4360 b1a1b1c eaa4360 eb511aa 7981895 eb511aa eaa4360 5cb63f8 eaa4360 ca49816 eaa4360 b1a1b1c e256ddd b1a1b1c eaa4360 b1a1b1c eaa4360 e256ddd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import gradio as gr
import openai
import base64
from PIL import Image
import io
# Function to send the request to OpenAI API with an image or text input
def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"):
if not openai_api_key:
return "Error: No API key provided."
openai.api_key = openai_api_key
# Process the input depending on whether it's text or an image
if image:
# Convert the image to base64 string
image_info = get_base64_string_from_image(image)
input_text = f"data:image/png;base64,{image_info}"
# Prepare the messages for OpenAI API
if model_choice == "o1":
messages = [
{"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_text}}]}
]
elif model_choice == "o3-mini":
messages = [
{"role": "user", "content": [{"type": "text", "text": input_text}]}
]
try:
# Call OpenAI API with the selected model
response = openai.ChatCompletion.create(
model=model_choice, # Dynamically choose the model (o1 or o3-mini)
messages=messages,
reasoning_effort=reasoning_effort, # Set reasoning_effort for the response
max_completion_tokens=2000 # Limit response tokens to 2000
)
return response["choices"][0]["message"]["content"]
except Exception as e:
return f"Error calling OpenAI API: {str(e)}"
# Function to convert an uploaded image to a base64 string
def get_base64_string_from_image(pil_image):
# Convert PIL Image to bytes
buffered = io.BytesIO()
pil_image.save(buffered, format="PNG")
img_bytes = buffered.getvalue()
base64_str = base64.b64encode(img_bytes).decode("utf-8")
return base64_str
# The function that will be used by Gradio interface
def chatbot(input_text, image, openai_api_key, reasoning_effort, model_choice, history=[]):
response = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice)
# Append the response to the history
history.append((f"User: {input_text}", f"Assistant: {response}"))
return "", history
# Function to clear the chat history
def clear_history():
return "", []
# Gradio interface setup
def create_interface():
with gr.Blocks() as demo:
gr.Markdown("# Multimodal Chatbot (Text + Image)")
# Add a description after the title
gr.Markdown("""
### Description:
This is a multimodal chatbot that can handle both text and image inputs.
- You can ask questions or provide text, and the assistant will respond.
- You can also upload an image, and the assistant will process it and answer questions about the image.
- Enter your OpenAI API key to start interacting with the model.
- You can use the 'Clear History' button to remove the conversation history.
- "o1" is for image chat and "o3-mini" is for text chat.
### Reasoning Effort:
The reasoning effort controls how complex or detailed the assistant's answers should be.
- **Low**: Provides quick, concise answers with minimal reasoning or details.
- **Medium**: Offers a balanced response with a reasonable level of detail and thought.
- **High**: Produces more detailed, analytical, or thoughtful responses, requiring deeper reasoning.
""")
with gr.Row():
openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)
with gr.Row():
image_input = gr.Image(label="Upload an Image", type="pil") # Image upload input
input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
with gr.Row():
reasoning_effort = gr.Dropdown(
label="Reasoning Effort",
choices=["low", "medium", "high"],
value="medium"
)
model_choice = gr.Dropdown(
label="Select Model",
choices=["o1", "o3-mini"],
value="o1" # Default to 'o1' for image-related tasks
)
submit_btn = gr.Button("Send")
clear_btn = gr.Button("Clear History")
chat_history = gr.Chatbot()
# Button interactions
submit_btn.click(fn=chatbot, inputs=[input_text, image_input, openai_api_key, reasoning_effort, model_choice, chat_history], outputs=[input_text, chat_history])
clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
return demo
# Run the interface
if __name__ == "__main__":
demo = create_interface()
demo.launch() |