Spaces:

shukdevdatta123
/

Multi-modal-o1-Chatbot

Running

App Files Files Community

shukdevdatta123 commited on Mar 7

Commit

eaa4360

verified ·

1 Parent(s): 30fb1f4

Create app.py

Browse files

Files changed (1) hide show

app.py +100 -0

app.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import gradio as gr
+import openai
+from PIL import Image
+import io
+import base64
+# Function to send the request to OpenAI API
+def generate_response(prompt, openai_api_key, image_info="", reasoning_effort="medium"):
+    if not openai_api_key:
+        return "Error: No API key provided."
+    openai.api_key = openai_api_key
+    # Combine text prompt with optional image info
+    full_prompt = prompt
+    if image_info:
+        full_prompt += f"\n\nAdditional context about the image: {image_info}"
+    try:
+        # Call OpenAI API with the specified model ("o1")
+        response = openai.ChatCompletion.create(
+            model="o1",  # use model "o1"
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant."},
+                {"role": "user", "content": full_prompt},
+            ],
+            temperature=0.7,
+            max_tokens=300,
+            reasoning_effort=reasoning_effort  # Include reasoning_effort in the request
+        )
+        return response["choices"][0]["message"]["content"]
+    except Exception as e:
+        return f"Error calling OpenAI API: {str(e)}"
+# Function to convert an uploaded image to a base64 string
+def get_base64_string_from_image(pil_image):
+    buffered = io.BytesIO()
+    pil_image.save(buffered, format="PNG")
+    img_bytes = buffered.getvalue()
+    base64_str = base64.b64encode(img_bytes).decode("utf-8")
+    return base64_str
+# The function that will be used by Gradio interface
+def chatbot(input_text, image, openai_api_key, reasoning_effort, history=[]):
+    image_info = ""
+    # If an image is uploaded, convert it to base64 for reference
+    if image:
+        try:
+            image = Image.open(image)
+            image_info = get_base64_string_from_image(image)
+        except Exception as e:
+            image_info = f"Error reading image: {e}"
+    # Combine user input with image info (if any)
+    response = generate_response(input_text, openai_api_key, image_info, reasoning_effort)
+    # Append the response to the history
+    history.append((f"User: {input_text}", f"Assistant: {response}"))
+    return "", history
+# Function to clear the chat history
+def clear_history():
+    return "", []
+# Gradio interface setup
+def create_interface():
+    with gr.Blocks() as demo:
+        gr.Markdown("# Multimodal Chatbot (Text + Image)")
+        with gr.Row():
+            openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)
+        with gr.Row():
+            image_input = gr.Image(label="Upload an Image", type="pil")
+            input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
+        with gr.Row():
+            reasoning_effort = gr.Dropdown(
+                label="Reasoning Effort",
+                choices=["low", "medium", "high"],
+                value="medium",
+                description="Select the reasoning effort for generating the response."
+            )
+            submit_btn = gr.Button("Send")
+            clear_btn = gr.Button("Clear History")
+        chat_history = gr.Chatbot()
+        # Button interactions
+        submit_btn.click(fn=chatbot, inputs=[input_text, image_input, openai_api_key, reasoning_effort, chat_history], outputs=[input_text, chat_history])
+        clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
+    return demo
+# Run the interface
+if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch()