|
import gradio as gr |
|
import openai |
|
import base64 |
|
from PIL import Image |
|
import io |
|
|
|
|
|
def generate_response(input_text, image, openai_api_key, reasoning_effort="medium"): |
|
if not openai_api_key: |
|
return "Error: No API key provided." |
|
|
|
openai.api_key = openai_api_key |
|
|
|
|
|
if image: |
|
|
|
image_info = get_base64_string_from_image(image) |
|
input_text = f"data:image/png;base64,{image_info}" |
|
|
|
|
|
messages = [ |
|
{"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_text}}]} |
|
] |
|
|
|
try: |
|
|
|
response = openai.ChatCompletion.create( |
|
model="o1", |
|
messages=messages, |
|
reasoning_effort=reasoning_effort, |
|
max_completion_tokens=2000 |
|
) |
|
|
|
return response["choices"][0]["message"]["content"] |
|
except Exception as e: |
|
return f"Error calling OpenAI API: {str(e)}" |
|
|
|
|
|
def get_base64_string_from_image(pil_image): |
|
|
|
buffered = io.BytesIO() |
|
pil_image.save(buffered, format="PNG") |
|
img_bytes = buffered.getvalue() |
|
base64_str = base64.b64encode(img_bytes).decode("utf-8") |
|
return base64_str |
|
|
|
|
|
def chatbot(input_text, image, openai_api_key, reasoning_effort, history=[]): |
|
response = generate_response(input_text, image, openai_api_key, reasoning_effort) |
|
|
|
|
|
history.append((f"User: {input_text}", f"Assistant: {response}")) |
|
|
|
return "", history |
|
|
|
|
|
def clear_history(): |
|
return "", [] |
|
|
|
|
|
def create_interface(): |
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Multimodal Chatbot (Text + Image)") |
|
|
|
|
|
gr.Markdown(""" |
|
### Description: |
|
This is a multimodal chatbot that can handle both text and image inputs. |
|
- You can ask questions or provide text, and the assistant will respond. |
|
- You can also upload an image, and the assistant will process it and answer questions about the image. |
|
- Enter your OpenAI API key to start interacting with the model. |
|
- You can use the 'Clear History' button to remove the conversation history. |
|
""") |
|
|
|
with gr.Row(): |
|
openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True) |
|
|
|
with gr.Row(): |
|
image_input = gr.Image(label="Upload an Image", type="pil") |
|
input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2) |
|
|
|
with gr.Row(): |
|
reasoning_effort = gr.Dropdown( |
|
label="Reasoning Effort", |
|
choices=["low", "medium", "high"], |
|
value="medium" |
|
) |
|
submit_btn = gr.Button("Send") |
|
clear_btn = gr.Button("Clear History") |
|
|
|
chat_history = gr.Chatbot() |
|
|
|
|
|
submit_btn.click(fn=chatbot, inputs=[input_text, image_input, openai_api_key, reasoning_effort, chat_history], outputs=[input_text, chat_history]) |
|
clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history]) |
|
|
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
demo = create_interface() |
|
demo.launch() |
|
|