shukdevdatta123's picture
Create app.py
eaa4360 verified
raw
history blame
3.56 kB
import gradio as gr
import openai
from PIL import Image
import io
import base64
# Function to send the request to OpenAI API
def generate_response(prompt, openai_api_key, image_info="", reasoning_effort="medium"):
if not openai_api_key:
return "Error: No API key provided."
openai.api_key = openai_api_key
# Combine text prompt with optional image info
full_prompt = prompt
if image_info:
full_prompt += f"\n\nAdditional context about the image: {image_info}"
try:
# Call OpenAI API with the specified model ("o1")
response = openai.ChatCompletion.create(
model="o1", # use model "o1"
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": full_prompt},
],
temperature=0.7,
max_tokens=300,
reasoning_effort=reasoning_effort # Include reasoning_effort in the request
)
return response["choices"][0]["message"]["content"]
except Exception as e:
return f"Error calling OpenAI API: {str(e)}"
# Function to convert an uploaded image to a base64 string
def get_base64_string_from_image(pil_image):
buffered = io.BytesIO()
pil_image.save(buffered, format="PNG")
img_bytes = buffered.getvalue()
base64_str = base64.b64encode(img_bytes).decode("utf-8")
return base64_str
# The function that will be used by Gradio interface
def chatbot(input_text, image, openai_api_key, reasoning_effort, history=[]):
image_info = ""
# If an image is uploaded, convert it to base64 for reference
if image:
try:
image = Image.open(image)
image_info = get_base64_string_from_image(image)
except Exception as e:
image_info = f"Error reading image: {e}"
# Combine user input with image info (if any)
response = generate_response(input_text, openai_api_key, image_info, reasoning_effort)
# Append the response to the history
history.append((f"User: {input_text}", f"Assistant: {response}"))
return "", history
# Function to clear the chat history
def clear_history():
return "", []
# Gradio interface setup
def create_interface():
with gr.Blocks() as demo:
gr.Markdown("# Multimodal Chatbot (Text + Image)")
with gr.Row():
openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)
with gr.Row():
image_input = gr.Image(label="Upload an Image", type="pil")
input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
with gr.Row():
reasoning_effort = gr.Dropdown(
label="Reasoning Effort",
choices=["low", "medium", "high"],
value="medium",
description="Select the reasoning effort for generating the response."
)
submit_btn = gr.Button("Send")
clear_btn = gr.Button("Clear History")
chat_history = gr.Chatbot()
# Button interactions
submit_btn.click(fn=chatbot, inputs=[input_text, image_input, openai_api_key, reasoning_effort, chat_history], outputs=[input_text, chat_history])
clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
return demo
# Run the interface
if __name__ == "__main__":
demo = create_interface()
demo.launch()