Spaces:

shukdevdatta123
/

Multi-modal-o1-Chatbot

Running

App Files Files Community

shukdevdatta123 commited on Mar 7

Commit

5016e38

verified ·

1 Parent(s): 5b332f1

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -29

app.py CHANGED Viewed

@@ -1,32 +1,35 @@
 import gradio as gr
 import openai
 from PIL import Image
 import io
-import base64
-# Function to send the request to OpenAI API
-def generate_response(prompt, openai_api_key, image_info="", reasoning_effort="medium"):
     if not openai_api_key:
         return "Error: No API key provided."
     openai.api_key = openai_api_key
-    # Combine text prompt with optional image info
-    full_prompt = prompt
-    if image_info:
-        full_prompt += f"\n\nAdditional context about the image: {image_info}"
     try:
-        # Call OpenAI API with the specified model ("o1")
         response = openai.ChatCompletion.create(
-            model="o1",  # use model "o1"
-            messages=[
-                {"role": "system", "content": "You are a helpful assistant."},
-                {"role": "user", "content": full_prompt},
-            ],
-            max_completion_tokens=300,  # Use max_completion_tokens instead of max_tokens
-            reasoning_effort=reasoning_effort  # Include reasoning_effort in the request
         )
         return response["choices"][0]["message"]["content"]
     except Exception as e:
         return f"Error calling OpenAI API: {str(e)}"
@@ -42,19 +45,7 @@ def get_base64_string_from_image(pil_image):
 # The function that will be used by Gradio interface
 def chatbot(input_text, image, openai_api_key, reasoning_effort, history=[]):
-    image_info = ""
-    # If an image is uploaded, convert it to base64 for reference (this does NOT analyze the image directly)
-    if image:
-        try:
-            # Ensure the image is a PIL object (Gradio returns a PIL Image object)
-            image = Image.open(image)
-            image_info = get_base64_string_from_image(image)  # Store base64 string for the image
-        except Exception as e:
-            image_info = f"Error reading image: {e}"
-    # Combine user input with image info (if any)
-    response = generate_response(input_text, openai_api_key, image_info, reasoning_effort)
     # Append the response to the history
     history.append((f"User: {input_text}", f"Assistant: {response}"))
@@ -97,4 +88,4 @@ def create_interface():
 # Run the interface
 if __name__ == "__main__":
     demo = create_interface()
-    demo.launch()

 import gradio as gr
 import openai
+import base64
 from PIL import Image
 import io
+# Function to send the request to OpenAI API with an image or text input
+def generate_response(input_text, image, openai_api_key, reasoning_effort="medium"):
     if not openai_api_key:
         return "Error: No API key provided."
     openai.api_key = openai_api_key
+    # Process the input depending on whether it's text or an image
+    if image:
+        # Convert the image to base64 string
+        image_info = get_base64_string_from_image(image)
+        input_text = f"data:image/png;base64,{image_info}"
+    # Prepare the messages for OpenAI API
+    messages = [
+        {"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_text}}]}
+    ]
     try:
+        # Call OpenAI API with the "o1" model
         response = openai.ChatCompletion.create(
+            model="o1",  # Using model "o1"
+            messages=messages,
+            reasoning_effort=reasoning_effort  # Set reasoning_effort for the response
         )
         return response["choices"][0]["message"]["content"]
     except Exception as e:
         return f"Error calling OpenAI API: {str(e)}"
 # The function that will be used by Gradio interface
 def chatbot(input_text, image, openai_api_key, reasoning_effort, history=[]):
+    response = generate_response(input_text, image, openai_api_key, reasoning_effort)
     # Append the response to the history
     history.append((f"User: {input_text}", f"Assistant: {response}"))
 # Run the interface
 if __name__ == "__main__":
     demo = create_interface()
+    demo.launch()