Spaces:

reab5555
/

AI-Image-Anomaly-Detection

Running

App Files Files Community

reab5555 commited on Jan 7

Commit

76cd7ac

verified ·

1 Parent(s): 4c9f62d

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -3

app.py CHANGED Viewed

@@ -106,6 +106,25 @@ def show_mask(mask, ax, random_color=False):
 def process_image_detection(image, target_label, surprise_rating):
     device = "cuda" if torch.cuda.is_available() else "cpu"
     # Get original image DPI and size
@@ -113,7 +132,7 @@ def process_image_detection(image, target_label, surprise_rating):
     original_size = image.size
     # Calculate relative font size based on image dimensions
-    base_fontsize = min(original_size) / 40  # Adjust this divisor to change overall font size
     owlv2_processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
     owlv2_model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16").to(device)
@@ -130,7 +149,7 @@ def process_image_detection(image, target_label, surprise_rating):
     target_sizes = torch.tensor([image.size[::-1]]).to(device)
     results = owlv2_processor.post_process_object_detection(outputs, target_sizes=target_sizes)[0]
-    dpi = 300  # Increased DPI for better text rendering
     figsize = (original_size[0] / dpi, original_size[1] / dpi)
     fig = plt.figure(figsize=figsize, dpi=dpi)
@@ -170,7 +189,7 @@ def process_image_detection(image, target_label, surprise_rating):
                 (box[0], box[1]),
                 box[2] - box[0],
                 box[3] - box[1],
-                linewidth=max(2, min(original_size) / 500),  # Scale line width with image size
                 edgecolor='red',
                 facecolor='none'
             )

 def process_image_detection(image, target_label, surprise_rating):
+    # Handle different image input types
+    if isinstance(image, tuple):
+        if len(image) > 0 and image[0] is not None:
+            image = Image.fromarray(image[0])
+        else:
+            raise ValueError("Invalid image tuple provided")
+    elif isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
+    elif isinstance(image, str):
+        image = Image.open(image)
+    # Ensure image is in PIL Image format
+    if not isinstance(image, Image.Image):
+        raise ValueError("Input must be a PIL Image, numpy array, or valid image path")
+    # Ensure image is in RGB mode
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
     device = "cuda" if torch.cuda.is_available() else "cpu"
     # Get original image DPI and size
     original_size = image.size
     # Calculate relative font size based on image dimensions
+    base_fontsize = min(original_size) / 40
     owlv2_processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
     owlv2_model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16").to(device)
     target_sizes = torch.tensor([image.size[::-1]]).to(device)
     results = owlv2_processor.post_process_object_detection(outputs, target_sizes=target_sizes)[0]
+    dpi = 300
     figsize = (original_size[0] / dpi, original_size[1] / dpi)
     fig = plt.figure(figsize=figsize, dpi=dpi)
                 (box[0], box[1]),
                 box[2] - box[0],
                 box[3] - box[1],
+                linewidth=max(2, min(original_size) / 500),
                 edgecolor='red',
                 facecolor='none'
             )