Spaces:

shukdevdatta123
/

Multi-modal-o1-Chatbot

Running

App Files Files Community

Multi-modal-o1-Chatbot / app.py

shukdevdatta123

Update app.py

eb511aa verified about 2 months ago

raw

history blame

3.82 kB

	import gradio as gr
	import openai
	import base64
	from PIL import Image
	import io

	# Function to send the request to OpenAI API with an image or text input
	def generate_response(input_text, image, openai_api_key, reasoning_effort="medium"):
	if not openai_api_key:
	return "Error: No API key provided."

	openai.api_key = openai_api_key

	# Process the input depending on whether it's text or an image
	if image:
	# Convert the image to base64 string
	image_info = get_base64_string_from_image(image)
	input_text = f"data:image/png;base64,{image_info}"

	# Prepare the messages for OpenAI API
	messages = [
	{"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_text}}]}
	]

	try:
	# Call OpenAI API with the "o1" model
	response = openai.ChatCompletion.create(
	model="o1", # Using model "o1"
	messages=messages,
	reasoning_effort=reasoning_effort, # Set reasoning_effort for the response
	max_completion_tokens=2000 # Limit response tokens to 2000
	)

	return response["choices"][0]["message"]["content"]
	except Exception as e:
	return f"Error calling OpenAI API: {str(e)}"

	# Function to convert an uploaded image to a base64 string
	def get_base64_string_from_image(pil_image):
	# Convert PIL Image to bytes
	buffered = io.BytesIO()
	pil_image.save(buffered, format="PNG")
	img_bytes = buffered.getvalue()
	base64_str = base64.b64encode(img_bytes).decode("utf-8")
	return base64_str

	# The function that will be used by Gradio interface
	def chatbot(input_text, image, openai_api_key, reasoning_effort, history=[]):
	response = generate_response(input_text, image, openai_api_key, reasoning_effort)

	# Append the response to the history
	history.append((f"User: {input_text}", f"Assistant: {response}"))

	return "", history

	# Function to clear the chat history
	def clear_history():
	return "", []

	# Gradio interface setup
	def create_interface():
	with gr.Blocks() as demo:
	gr.Markdown("# Multimodal Chatbot (Text + Image)")

	# Add a description after the title
	gr.Markdown("""
	### Description:
	This is a multimodal chatbot that can handle both text and image inputs.
	- You can ask questions or provide text, and the assistant will respond.
	- You can also upload an image, and the assistant will process it and answer questions about the image.
	- Enter your OpenAI API key to start interacting with the model.
	- You can use the 'Clear History' button to remove the conversation history.
	""")

	with gr.Row():
	openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)

	with gr.Row():
	image_input = gr.Image(label="Upload an Image", type="pil") # Image upload input
	input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)

	with gr.Row():
	reasoning_effort = gr.Dropdown(
	label="Reasoning Effort",
	choices=["low", "medium", "high"],
	value="medium"
	)
	submit_btn = gr.Button("Send")
	clear_btn = gr.Button("Clear History")

	chat_history = gr.Chatbot()

	# Button interactions
	submit_btn.click(fn=chatbot, inputs=[input_text, image_input, openai_api_key, reasoning_effort, chat_history], outputs=[input_text, chat_history])
	clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])

	return demo

	# Run the interface
	if __name__ == "__main__":
	demo = create_interface()
	demo.launch()