import gradio as gr import openai import base64 from PIL import Image import io # Function to send the request to OpenAI API with an image or text input def generate_response(input_text, image, openai_api_key, reasoning_effort="medium"): if not openai_api_key: return "Error: No API key provided." openai.api_key = openai_api_key # Process the input depending on whether it's text or an image if image: # Convert the image to base64 string image_info = get_base64_string_from_image(image) input_text = f"data:image/png;base64,{image_info}" # Prepare the messages for OpenAI API messages = [ {"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_text}}]} ] try: # Call OpenAI API with the "o1" model response = openai.ChatCompletion.create( model="o1", # Using model "o1" messages=messages, reasoning_effort=reasoning_effort, # Set reasoning_effort for the response max_completion_tokens=2000 # Limit response tokens to 2000 ) return response["choices"][0]["message"]["content"] except Exception as e: return f"Error calling OpenAI API: {str(e)}" # Function to convert an uploaded image to a base64 string def get_base64_string_from_image(pil_image): # Convert PIL Image to bytes buffered = io.BytesIO() pil_image.save(buffered, format="PNG") img_bytes = buffered.getvalue() base64_str = base64.b64encode(img_bytes).decode("utf-8") return base64_str # The function that will be used by Gradio interface def chatbot(input_text, image, openai_api_key, reasoning_effort, history=[]): response = generate_response(input_text, image, openai_api_key, reasoning_effort) # Append the response to the history history.append((f"User: {input_text}", f"Assistant: {response}")) return "", history # Function to clear the chat history def clear_history(): return "", [] # Gradio interface setup def create_interface(): with gr.Blocks() as demo: gr.Markdown("# Multimodal Chatbot (Text + Image)") # Add a description after the title gr.Markdown(""" ### Description: This is a multimodal chatbot that can handle both text and image inputs. - You can ask questions or provide text, and the assistant will respond. - You can also upload an image, and the assistant will process it and answer questions about the image. - Enter your OpenAI API key to start interacting with the model. - You can use the 'Clear History' button to remove the conversation history. """) with gr.Row(): openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True) with gr.Row(): image_input = gr.Image(label="Upload an Image", type="pil") # Image upload input input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2) with gr.Row(): reasoning_effort = gr.Dropdown( label="Reasoning Effort", choices=["low", "medium", "high"], value="medium" ) submit_btn = gr.Button("Send") clear_btn = gr.Button("Clear History") chat_history = gr.Chatbot() # Button interactions submit_btn.click(fn=chatbot, inputs=[input_text, image_input, openai_api_key, reasoning_effort, chat_history], outputs=[input_text, chat_history]) clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history]) return demo # Run the interface if __name__ == "__main__": demo = create_interface() demo.launch()