Spaces:

reab5555
/

AI-Image-Anomaly-Detection

Running

App Files Files Community

reab5555 commited on Jan 7

Commit

3da6b4f

verified ·

1 Parent(s): a27e6f2

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -45

app.py CHANGED Viewed

@@ -36,44 +36,48 @@ def encode_image_to_base64(image):
     image.save(buffered, format="PNG")
     return base64.b64encode(buffered.getvalue()).decode('utf-8')
 def analyze_image(image):
     client = OpenAI(api_key=OPENAI_API_KEY)
     base64_image = encode_image_to_base64(image)
-    messages = [
         {
-            "role": "user",
-            "content": [
-                {
-                    "type": "text",
-                    "text": """Your task is to determine if the image is surprising or not surprising.
-                    if the image is surprising, determine which element, figure or object in the image is making the image surprising and write it only in one sentence with no more then 6 words, otherwise, write 'NA'.
-                    Also rate how surprising the image is on a scale of 1-5, where 1 is not surprising at all and 5 is highly surprising.
-                    Provide the response as a JSON with the following structure:
-                    {
-                        "label": "[surprising OR not surprising]",
-                        "element": "[element]",
-                        "rating": [1-5]
-                    }"""
-                },
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": f"data:image/jpeg;base64,{base64_image}"
-                    }
-                }
-            ]
         }
     ]
     response = client.chat.completions.create(
         model="gpt-4o-mini",
-        messages=messages,
         max_tokens=100,
         temperature=0.1,
-        response_format={
-            "type": "json_object"
-        }
     )
     return response.choices[0].message.content
@@ -102,7 +106,7 @@ def process_image_detection(image, target_label, surprise_rating):
     original_size = image.size
     # Calculate relative font size based on image dimensions
-    base_fontsize = min(original_size) / 40  # Adjust this divisor to change overall font size
     owlv2_processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
     owlv2_model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16").to(device)
@@ -125,7 +129,6 @@ def process_image_detection(image, target_label, surprise_rating):
     ax = plt.Axes(fig, [0., 0., 1., 1.])
     fig.add_axes(ax)
     plt.imshow(image)
     scores = results["scores"]
@@ -154,18 +157,18 @@ def process_image_detection(image, target_label, surprise_rating):
             mask = masks[0].numpy() if isinstance(masks[0], torch.Tensor) else masks[0]
             show_mask(mask, ax=ax)
-            # Draw rectangle with increased line width
             rect = patches.Rectangle(
                 (box[0], box[1]),
                 box[2] - box[0],
                 box[3] - box[1],
-                linewidth=max(2, min(original_size) / 500),  # Scale line width with image size
                 edgecolor='red',
                 facecolor='none'
             )
             ax.add_patch(rect)
-            # Add confidence score with improved visibility
             plt.text(
                 box[0], box[1] - base_fontsize,
                 f'{max_score:.2f}',
@@ -175,7 +178,7 @@ def process_image_detection(image, target_label, surprise_rating):
                 bbox=dict(facecolor='white', alpha=0.7, edgecolor='none', pad=2)
             )
-            # Add label and rating with improved visibility
             plt.text(
                 box[2] + base_fontsize / 2, box[1],
                 f'Unexpected (Rating: {surprise_rating}/5)\n{target_label}',
@@ -188,18 +191,20 @@ def process_image_detection(image, target_label, surprise_rating):
     plt.axis('off')
-    # Save with high DPI
     buf = io.BytesIO()
-    plt.savefig(buf,
-                format='png',
-                dpi=dpi,
-                bbox_inches='tight',
-                pad_inches=0,
-                metadata={'dpi': original_dpi})
     buf.seek(0)
     plt.close()
-    # Process final image
     output_image = Image.open(buf)
     output_image = output_image.resize(original_size, Image.Resampling.LANCZOS)
@@ -220,29 +225,34 @@ def process_and_analyze(image):
     try:
         # Handle different input types
         if isinstance(image, tuple):
-            image = image[0]  # Take the first element if it's a tuple
         if isinstance(image, np.ndarray):
             image = Image.fromarray(image)
         if not isinstance(image, Image.Image):
             raise ValueError("Invalid image format")
-        # Analyze image
         gpt_response = analyze_image(image)
         response_data = json.loads(gpt_response)
         if response_data["label"].lower() == "surprising" and response_data["element"].lower() != "na":
             result_buf = process_image_detection(image, response_data["element"], response_data["rating"])
             result_image = Image.open(result_buf)
-            analysis_text = f"Label: {response_data['label']}\nElement: {response_data['element']}\nRating: {response_data['rating']}/5"
             return result_image, analysis_text
         else:
             return image, "Not Surprising"
     except Exception as e:
         return None, f"Error processing image: {str(e)}"
-# Create Gradio interface
 def create_interface():
     with gr.Blocks() as demo:
         gr.Markdown("# Image Surprise Analysis")
@@ -267,4 +277,4 @@ def create_interface():
 if __name__ == "__main__":
     demo = create_interface()
-    demo.launch()

     image.save(buffered, format="PNG")
     return base64.b64encode(buffered.getvalue()).decode('utf-8')
 def analyze_image(image):
     client = OpenAI(api_key=OPENAI_API_KEY)
     base64_image = encode_image_to_base64(image)
+    # Build the list-of-dicts prompt:
+    prompt_dict = [
+        {
+            "type": "text",
+            "text": """Your task is to determine if the image is surprising or not surprising.
+            If the image is surprising, determine which element, figure, or object in the image is making the image surprising and write it only in one sentence with no more than 6 words.
+            Otherwise, write 'NA'.
+            Also, rate how surprising the image is on a scale of 1-5, where 1 is not surprising at all and 5 is highly surprising.
+            Provide the response as a JSON with the following structure:
+            {
+                "label": "[surprising OR not surprising]",
+                "element": "[element]",
+                "rating": [1-5]
+            }"""
+        },
         {
+            "type": "image_url",
+            "image_url": {
+                "url": f"data:image/jpeg;base64,{base64_image}"
+            }
         }
     ]
+    # JSON-encode the list so "content" is a string
+    json_prompt = json.dumps(prompt_dict)
     response = client.chat.completions.create(
         model="gpt-4o-mini",
+        messages=[
+            {
+                "role": "user",
+                "content": json_prompt,  # must be a string
+            }
+        ],
         max_tokens=100,
         temperature=0.1,
+        response_format={"type": "json_object"}
     )
     return response.choices[0].message.content
     original_size = image.size
     # Calculate relative font size based on image dimensions
+    base_fontsize = min(original_size) / 40  # Adjust this divisor as needed
     owlv2_processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
     owlv2_model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16").to(device)
     ax = plt.Axes(fig, [0., 0., 1., 1.])
     fig.add_axes(ax)
     plt.imshow(image)
     scores = results["scores"]
             mask = masks[0].numpy() if isinstance(masks[0], torch.Tensor) else masks[0]
             show_mask(mask, ax=ax)
+            # Draw rectangle around the detected area
             rect = patches.Rectangle(
                 (box[0], box[1]),
                 box[2] - box[0],
                 box[3] - box[1],
+                linewidth=max(2, min(original_size) / 500),
                 edgecolor='red',
                 facecolor='none'
             )
             ax.add_patch(rect)
+            # Confidence score
             plt.text(
                 box[0], box[1] - base_fontsize,
                 f'{max_score:.2f}',
                 bbox=dict(facecolor='white', alpha=0.7, edgecolor='none', pad=2)
             )
+            # Label + rating
             plt.text(
                 box[2] + base_fontsize / 2, box[1],
                 f'Unexpected (Rating: {surprise_rating}/5)\n{target_label}',
     plt.axis('off')
+    # Save figure to buffer
     buf = io.BytesIO()
+    plt.savefig(
+        buf,
+        format='png',
+        dpi=dpi,
+        bbox_inches='tight',
+        pad_inches=0,
+        metadata={'dpi': original_dpi}
+    )
     buf.seek(0)
     plt.close()
+    # Convert buffer back to PIL
     output_image = Image.open(buf)
     output_image = output_image.resize(original_size, Image.Resampling.LANCZOS)
     try:
         # Handle different input types
         if isinstance(image, tuple):
+            image = image[0]
         if isinstance(image, np.ndarray):
             image = Image.fromarray(image)
         if not isinstance(image, Image.Image):
             raise ValueError("Invalid image format")
+        # Analyze image with GPT
         gpt_response = analyze_image(image)
         response_data = json.loads(gpt_response)
+        # If surprising, try to detect the element
         if response_data["label"].lower() == "surprising" and response_data["element"].lower() != "na":
             result_buf = process_image_detection(image, response_data["element"], response_data["rating"])
             result_image = Image.open(result_buf)
+            analysis_text = (
+                f"Label: {response_data['label']}\n"
+                f"Element: {response_data['element']}\n"
+                f"Rating: {response_data['rating']}/5"
+            )
             return result_image, analysis_text
         else:
+            # If not surprising or element=NA
             return image, "Not Surprising"
     except Exception as e:
         return None, f"Error processing image: {str(e)}"
 def create_interface():
     with gr.Blocks() as demo:
         gr.Markdown("# Image Surprise Analysis")
 if __name__ == "__main__":
     demo = create_interface()
+    demo.launch()