File size: 4,802 Bytes
1aa9c91 fe9817a 1aa9c91 fe9817a 1aa9c91 fe9817a 1aa9c91 fe9817a 1aa9c91 fe9817a 1aa9c91 fe9817a 1aa9c91 fe9817a 1aa9c91 fe9817a 1aa9c91 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import gradio as gr import openai import base64 from PIL import Image import io # Function to send the request to OpenAI API with an image or text input def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"): if not openai_api_key: return "Error: No API key provided." openai.api_key = openai_api_key # Process the input depending on whether it's text or an image if image: # Convert the image to base64 string image_info = get_base64_string_from_image(image) input_text = f"data:image/png;base64,{image_info}" # Prepare the messages for OpenAI API if model_choice == "o1": messages = [ {"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_text}}]} ] elif model_choice == "o3-mini": messages = [ {"role": "user", "content": [{"type": "text", "text": input_text}]} ] try: # Call OpenAI API with the selected model response = openai.ChatCompletion.create( model=model_choice, # Dynamically choose the model (o1 or o3-mini) messages=messages, reasoning_effort=reasoning_effort, # Set reasoning_effort for the response max_completion_tokens=2000 # Limit response tokens to 2000 ) return response["choices"][0]["message"]["content"] except Exception as e: return f"Error calling OpenAI API: {str(e)}" # Function to convert an uploaded image to a base64 string def get_base64_string_from_image(pil_image): # Convert PIL Image to bytes buffered = io.BytesIO() pil_image.save(buffered, format="PNG") img_bytes = buffered.getvalue() base64_str = base64.b64encode(img_bytes).decode("utf-8") return base64_str # The function that will be used by Gradio interface def chatbot(input_text, image, openai_api_key, reasoning_effort, model_choice, history=[]): response = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice) # Append the response to the history history.append((f"User: {input_text}", f"Assistant: {response}")) return "", history # Function to clear the chat history def clear_history(): return "", [] # Gradio interface setup def create_interface(): with gr.Blocks() as demo: gr.Markdown("# Multimodal Chatbot (Text + Image)") # Add a description after the title gr.Markdown(""" ### Description: This is a multimodal chatbot that can handle both text and image inputs. - You can ask questions or provide text, and the assistant will respond. - You can also upload an image, and the assistant will process it and answer questions about the image. - Enter your OpenAI API key to start interacting with the model. - You can use the 'Clear History' button to remove the conversation history. - "o1" is for image chat and "o3-mini" is for text chat. ### Reasoning Effort: The reasoning effort controls how complex or detailed the assistant's answers should be. - **Low**: Provides quick, concise answers with minimal reasoning or details. - **Medium**: Offers a balanced response with a reasonable level of detail and thought. - **High**: Produces more detailed, analytical, or thoughtful responses, requiring deeper reasoning. """) with gr.Row(): openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True) with gr.Row(): image_input = gr.Image(label="Upload an Image", type="pil") # Image upload input input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2) with gr.Row(): reasoning_effort = gr.Dropdown( label="Reasoning Effort", choices=["low", "medium", "high"], value="medium" ) model_choice = gr.Dropdown( label="Select Model", choices=["o1", "o3-mini"], value="o1" # Default to 'o1' for image-related tasks ) submit_btn = gr.Button("Send") clear_btn = gr.Button("Clear History") chat_history = gr.Chatbot() # Button interactions submit_btn.click(fn=chatbot, inputs=[input_text, image_input, openai_api_key, reasoning_effort, model_choice, chat_history], outputs=[input_text, chat_history]) clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history]) return demo # Run the interface if __name__ == "__main__": demo = create_interface() demo.launch() |