import gradio as gr import openai from PIL import Image import io import base64 # Function to send the request to OpenAI API def generate_response(prompt, openai_api_key, image_info="", reasoning_effort="medium"): if not openai_api_key: return "Error: No API key provided." openai.api_key = openai_api_key # Combine text prompt with optional image info full_prompt = prompt if image_info: full_prompt += f"\n\nAdditional context about the image: {image_info}" try: # Call OpenAI API with the specified model ("o1") response = openai.ChatCompletion.create( model="o1", # use model "o1" messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": full_prompt}, ], temperature=0.7, max_tokens=300, reasoning_effort=reasoning_effort # Include reasoning_effort in the request ) return response["choices"][0]["message"]["content"] except Exception as e: return f"Error calling OpenAI API: {str(e)}" # Function to convert an uploaded image to a base64 string def get_base64_string_from_image(pil_image): buffered = io.BytesIO() pil_image.save(buffered, format="PNG") img_bytes = buffered.getvalue() base64_str = base64.b64encode(img_bytes).decode("utf-8") return base64_str # The function that will be used by Gradio interface def chatbot(input_text, image, openai_api_key, reasoning_effort, history=[]): image_info = "" # If an image is uploaded, convert it to base64 for reference if image: try: image = Image.open(image) image_info = get_base64_string_from_image(image) except Exception as e: image_info = f"Error reading image: {e}" # Combine user input with image info (if any) response = generate_response(input_text, openai_api_key, image_info, reasoning_effort) # Append the response to the history history.append((f"User: {input_text}", f"Assistant: {response}")) return "", history # Function to clear the chat history def clear_history(): return "", [] # Gradio interface setup def create_interface(): with gr.Blocks() as demo: gr.Markdown("# Multimodal Chatbot (Text + Image)") with gr.Row(): openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True) with gr.Row(): image_input = gr.Image(label="Upload an Image", type="pil") input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2) with gr.Row(): reasoning_effort = gr.Dropdown( label="Reasoning Effort", choices=["low", "medium", "high"], value="medium", description="Select the reasoning effort for generating the response." ) submit_btn = gr.Button("Send") clear_btn = gr.Button("Clear History") chat_history = gr.Chatbot() # Button interactions submit_btn.click(fn=chatbot, inputs=[input_text, image_input, openai_api_key, reasoning_effort, chat_history], outputs=[input_text, chat_history]) clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history]) return demo # Run the interface if __name__ == "__main__": demo = create_interface() demo.launch()