shukdevdatta123's picture
Update app.py
eb511aa verified
raw
history blame
3.82 kB
import gradio as gr
import openai
import base64
from PIL import Image
import io
# Function to send the request to OpenAI API with an image or text input
def generate_response(input_text, image, openai_api_key, reasoning_effort="medium"):
if not openai_api_key:
return "Error: No API key provided."
openai.api_key = openai_api_key
# Process the input depending on whether it's text or an image
if image:
# Convert the image to base64 string
image_info = get_base64_string_from_image(image)
input_text = f"data:image/png;base64,{image_info}"
# Prepare the messages for OpenAI API
messages = [
{"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_text}}]}
]
try:
# Call OpenAI API with the "o1" model
response = openai.ChatCompletion.create(
model="o1", # Using model "o1"
messages=messages,
reasoning_effort=reasoning_effort, # Set reasoning_effort for the response
max_completion_tokens=2000 # Limit response tokens to 2000
)
return response["choices"][0]["message"]["content"]
except Exception as e:
return f"Error calling OpenAI API: {str(e)}"
# Function to convert an uploaded image to a base64 string
def get_base64_string_from_image(pil_image):
# Convert PIL Image to bytes
buffered = io.BytesIO()
pil_image.save(buffered, format="PNG")
img_bytes = buffered.getvalue()
base64_str = base64.b64encode(img_bytes).decode("utf-8")
return base64_str
# The function that will be used by Gradio interface
def chatbot(input_text, image, openai_api_key, reasoning_effort, history=[]):
response = generate_response(input_text, image, openai_api_key, reasoning_effort)
# Append the response to the history
history.append((f"User: {input_text}", f"Assistant: {response}"))
return "", history
# Function to clear the chat history
def clear_history():
return "", []
# Gradio interface setup
def create_interface():
with gr.Blocks() as demo:
gr.Markdown("# Multimodal Chatbot (Text + Image)")
# Add a description after the title
gr.Markdown("""
### Description:
This is a multimodal chatbot that can handle both text and image inputs.
- You can ask questions or provide text, and the assistant will respond.
- You can also upload an image, and the assistant will process it and answer questions about the image.
- Enter your OpenAI API key to start interacting with the model.
- You can use the 'Clear History' button to remove the conversation history.
""")
with gr.Row():
openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)
with gr.Row():
image_input = gr.Image(label="Upload an Image", type="pil") # Image upload input
input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
with gr.Row():
reasoning_effort = gr.Dropdown(
label="Reasoning Effort",
choices=["low", "medium", "high"],
value="medium"
)
submit_btn = gr.Button("Send")
clear_btn = gr.Button("Clear History")
chat_history = gr.Chatbot()
# Button interactions
submit_btn.click(fn=chatbot, inputs=[input_text, image_input, openai_api_key, reasoning_effort, chat_history], outputs=[input_text, chat_history])
clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
return demo
# Run the interface
if __name__ == "__main__":
demo = create_interface()
demo.launch()