Spaces:

reab5555
/

AI-Image-Anomaly-Detection

Sleeping

App Files Files Community

reab5555 commited on Jan 7

Commit

f57fde9

verified ·

1 Parent(s): 3da6b4f

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -26

app.py CHANGED Viewed

@@ -19,42 +19,48 @@ load_dotenv()
 OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
-def encode_image_to_base64(image):
-    # If image is a tuple (as sometimes provided by Gradio), take the first element
-    if isinstance(image, tuple):
-        image = image[0]
-    # If image is a numpy array, convert to PIL Image
-    if isinstance(image, np.ndarray):
-        image = Image.fromarray(image)
-    # Ensure image is in PIL Image format
     if not isinstance(image, Image.Image):
-        raise ValueError("Input must be a PIL Image, numpy array, or tuple containing an image")
     buffered = io.BytesIO()
-    image.save(buffered, format="PNG")
     return base64.b64encode(buffered.getvalue()).decode('utf-8')
 def analyze_image(image):
     client = OpenAI(api_key=OPENAI_API_KEY)
-    base64_image = encode_image_to_base64(image)
-    # Build the list-of-dicts prompt:
     prompt_dict = [
         {
             "type": "text",
-            "text": """Your task is to determine if the image is surprising or not surprising.
-            If the image is surprising, determine which element, figure, or object in the image is making the image surprising and write it only in one sentence with no more than 6 words.
-            Otherwise, write 'NA'.
-            Also, rate how surprising the image is on a scale of 1-5, where 1 is not surprising at all and 5 is highly surprising.
-            Provide the response as a JSON with the following structure:
             {
-                "label": "[surprising OR not surprising]",
-                "element": "[element]",
-                "rating": [1-5]
-            }"""
         },
         {
             "type": "image_url",
@@ -64,15 +70,16 @@ def analyze_image(image):
         }
     ]
-    # JSON-encode the list so "content" is a string
     json_prompt = json.dumps(prompt_dict)
     response = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
             {
                 "role": "user",
-                "content": json_prompt,  # must be a string
             }
         ],
         max_tokens=100,
@@ -83,6 +90,7 @@ def analyze_image(image):
     return response.choices[0].message.content
 def show_mask(mask, ax, random_color=False):
     if random_color:
         color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)

 OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
+def resize_and_compress(image, max_width=800, max_height=800, quality=50):
+    """Resize (if > max_width/height) and compress the image to keep Base64 under ~1MB."""
     if not isinstance(image, Image.Image):
+        raise ValueError("Input must be a PIL Image")
+    width, height = image.size
+    if width > max_width or height > max_height:
+        aspect_ratio = width / height
+        if aspect_ratio > 1:
+            new_width = max_width
+            new_height = int(new_width / aspect_ratio)
+        else:
+            new_height = max_height
+            new_width = int(new_height * aspect_ratio)
+        image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
     buffered = io.BytesIO()
+    # Save as JPEG with reduced quality
+    image.save(buffered, format="JPEG", quality=quality)
+    buffered.seek(0)
     return base64.b64encode(buffered.getvalue()).decode('utf-8')
 def analyze_image(image):
     client = OpenAI(api_key=OPENAI_API_KEY)
+    # Step 1: Resize + compress to keep the Base64 string under 1 MB
+    base64_image = resize_and_compress(image, max_width=800, max_height=800, quality=50)
+    # Build the list-of-dicts prompt
     prompt_dict = [
         {
             "type": "text",
+            "text": """Your task is to determine if the image is surprising or not.
+            If the image is surprising, which element is surprising (max 6 words).
+            Otherwise, 'NA'. Also rate how surprising (1-5).
+            Return JSON like:
             {
+              "label": "[surprising or not surprising]",
+              "element": "[element]",
+              "rating": [1-5]
+            }
+            """
         },
         {
             "type": "image_url",
         }
     ]
+    # JSON-encode to ensure content is a string
     json_prompt = json.dumps(prompt_dict)
+    # Send request
     response = client.chat.completions.create(
         model="gpt-4o-mini",
         messages=[
             {
                 "role": "user",
+                "content": json_prompt
             }
         ],
         max_tokens=100,
     return response.choices[0].message.content
 def show_mask(mask, ax, random_color=False):
     if random_color:
         color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)