Spaces:

reab5555
/

AI-Image-Anomaly-Detection

Running

App Files Files Community

reab5555 commited on Jan 7

Commit

6b3604d

verified ·

1 Parent(s): e674f2c

Update app.py

Browse files

Files changed (1) hide show

app.py +148 -132

app.py CHANGED Viewed

@@ -119,138 +119,154 @@ def show_mask(mask, ax, random_color=False):
 def process_image_detection(image, target_label, surprise_rating):
-    # Handle different image input types
-    if isinstance(image, tuple):
-        if len(image) > 0 and image[0] is not None:
-            image = Image.fromarray(image[0])
-        else:
-            raise ValueError("Invalid image tuple provided")
-    elif isinstance(image, np.ndarray):
-        image = Image.fromarray(image)
-    elif isinstance(image, str):
-        image = Image.open(image)
-    # Ensure image is in PIL Image format
-    if not isinstance(image, Image.Image):
-        raise ValueError("Input must be a PIL Image, numpy array, or valid image path")
-    # Ensure image is in RGB mode
-    if image.mode != 'RGB':
-        image = image.convert('RGB')
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    # Get original image DPI and size
-    original_dpi = image.info.get('dpi', (72, 72))
-    original_size = image.size
-    # Calculate relative font size based on image dimensions
-    base_fontsize = min(original_size) / 40
-    owlv2_processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
-    owlv2_model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16").to(device)
-    sam_processor = AutoProcessor.from_pretrained("facebook/sam-vit-base")
-    sam_model = AutoModelForMaskGeneration.from_pretrained("facebook/sam-vit-base").to(device)
-    image_np = np.array(image)
-    inputs = owlv2_processor(text=[target_label], images=image, return_tensors="pt").to(device)
-    with torch.no_grad():
-        outputs = owlv2_model(**inputs)
-    target_sizes = torch.tensor([image.size[::-1]]).to(device)
-    results = owlv2_processor.post_process_object_detection(outputs, target_sizes=target_sizes)[0]
-    dpi = 300
-    figsize = (original_size[0] / dpi, original_size[1] / dpi)
-    fig = plt.figure(figsize=figsize, dpi=dpi)
-    ax = plt.Axes(fig, [0., 0., 1., 1.])
-    fig.add_axes(ax)
-    plt.imshow(image)
-    scores = results["scores"]
-    if len(scores) > 0:
-        max_score_idx = scores.argmax().item()
-        max_score = scores[max_score_idx].item()
-        if max_score > 0.2:
-            box = results["boxes"][max_score_idx].cpu().numpy()
-            sam_inputs = sam_processor(
-                image,
-                input_boxes=[[[box[0], box[1], box[2], box[3]]]],
-                return_tensors="pt"
-            ).to(device)
-            with torch.no_grad():
-                sam_outputs = sam_model(**sam_inputs)
-            masks = sam_processor.image_processor.post_process_masks(
-                sam_outputs.pred_masks.cpu(),
-                sam_inputs["original_sizes"].cpu(),
-                sam_inputs["reshaped_input_sizes"].cpu()
-            )
-            mask = masks[0].numpy() if isinstance(masks[0], torch.Tensor) else masks[0]
-            show_mask(mask, ax=ax)
-            # Draw rectangle with increased line width
-            rect = patches.Rectangle(
-                (box[0], box[1]),
-                box[2] - box[0],
-                box[3] - box[1],
-                linewidth=max(2, min(original_size) / 500),
-                edgecolor='red',
-                facecolor='none'
-            )
-            ax.add_patch(rect)
-            # Add confidence score with improved visibility
-            plt.text(
-                box[0], box[1] - base_fontsize,
-                f'{max_score:.2f}',
-                color='red',
-                fontsize=base_fontsize,
-                fontweight='bold',
-                bbox=dict(facecolor='white', alpha=0.7, edgecolor='none', pad=2)
-            )
-            # Add label and rating with improved visibility
-            plt.text(
-                box[2] + base_fontsize / 2, box[1],
-                f'Unexpected (Rating: {surprise_rating}/5)\n{target_label}',
-                color='red',
-                fontsize=base_fontsize,
-                fontweight='bold',
-                bbox=dict(facecolor='white', alpha=0.7, edgecolor='none', pad=2),
-                verticalalignment='bottom'
-            )
-    plt.axis('off')
-    # Save with high DPI
-    buf = io.BytesIO()
-    plt.savefig(buf,
-                format='png',
-                dpi=dpi,
-                bbox_inches='tight',
-                pad_inches=0,
-                metadata={'dpi': original_dpi})
-    buf.seek(0)
-    plt.close()
-    # Process final image
-    output_image = Image.open(buf)
-    output_image = output_image.resize(original_size, Image.Resampling.LANCZOS)
-    final_buf = io.BytesIO()
-    output_image.save(final_buf, format='PNG', dpi=original_dpi)
-    final_buf.seek(0)
-    return final_buf
 def process_and_analyze(image):

 def process_image_detection(image, target_label, surprise_rating):
+    try:
+        # Handle different image input types
+        if isinstance(image, tuple):
+            if len(image) > 0 and image[0] is not None:
+                if isinstance(image[0], np.ndarray):
+                    image = Image.fromarray(image[0])
+                else:
+                    image = image[0]
+            else:
+                raise ValueError("Invalid image tuple provided")
+        elif isinstance(image, np.ndarray):
+            image = Image.fromarray(image)
+        elif isinstance(image, str):
+            image = Image.open(image)
+        # Ensure image is in PIL Image format
+        if not isinstance(image, Image.Image):
+            raise ValueError(f"Input must be a PIL Image, got {type(image)}")
+        # Ensure image is in RGB mode
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"Using device: {device}")  # Debug print
+        # Get original image DPI and size
+        original_dpi = image.info.get('dpi', (72, 72))
+        original_size = image.size
+        print(f"Image size: {original_size}")  # Debug print
+        # Calculate relative font size based on image dimensions
+        base_fontsize = min(original_size) / 40
+        print("Loading models...")  # Debug print
+        owlv2_processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
+        owlv2_model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16").to(device)
+        sam_processor = AutoProcessor.from_pretrained("facebook/sam-vit-base")
+        sam_model = AutoModelForMaskGeneration.from_pretrained("facebook/sam-vit-base").to(device)
+        print("Running object detection...")  # Debug print
+        inputs = owlv2_processor(text=[target_label], images=image, return_tensors="pt").to(device)
+        with torch.no_grad():
+            outputs = owlv2_model(**inputs)
+        target_sizes = torch.tensor([image.size[::-1]]).to(device)
+        results = owlv2_processor.post_process_object_detection(outputs, target_sizes=target_sizes)[0]
+        dpi = 300
+        figsize = (original_size[0] / dpi, original_size[1] / dpi)
+        fig = plt.figure(figsize=figsize, dpi=dpi)
+        ax = plt.Axes(fig, [0., 0., 1., 1.])
+        fig.add_axes(ax)
+        ax.imshow(image)
+        scores = results["scores"]
+        if len(scores) > 0:
+            max_score_idx = scores.argmax().item()
+            max_score = scores[max_score_idx].item()
+            if max_score > 0.2:
+                print("Processing detection results...")  # Debug print
+                box = results["boxes"][max_score_idx].cpu().numpy()
+                print("Running SAM model...")  # Debug print
+                # Convert image to numpy array if needed for SAM
+                if isinstance(image, Image.Image):
+                    image_np = np.array(image)
+                else:
+                    image_np = image
+                sam_inputs = sam_processor(
+                    image_np,  # Use numpy array here
+                    input_boxes=[[[box[0], box[1], box[2], box[3]]]],
+                    return_tensors="pt"
+                ).to(device)
+                with torch.no_grad():
+                    sam_outputs = sam_model(**sam_inputs)
+                masks = sam_processor.image_processor.post_process_masks(
+                    sam_outputs.pred_masks.cpu(),
+                    sam_inputs["original_sizes"].cpu(),
+                    sam_inputs["reshaped_input_sizes"].cpu()
+                )
+                print(f"Mask type: {type(masks)}, Mask shape: {len(masks)}")  # Debug print
+                mask = masks[0]
+                if isinstance(mask, torch.Tensor):
+                    mask = mask.numpy()
+                show_mask(mask, ax=ax)
+                rect = patches.Rectangle(
+                    (box[0], box[1]),
+                    box[2] - box[0],
+                    box[3] - box[1],
+                    linewidth=max(2, min(original_size) / 500),
+                    edgecolor='red',
+                    facecolor='none'
+                )
+                ax.add_patch(rect)
+                plt.text(
+                    box[0], box[1] - base_fontsize,
+                    f'{max_score:.2f}',
+                    color='red',
+                    fontsize=base_fontsize,
+                    fontweight='bold',
+                    bbox=dict(facecolor='white', alpha=0.7, edgecolor='none', pad=2)
+                )
+                plt.text(
+                    box[2] + base_fontsize / 2, box[1],
+                    f'Unexpected (Rating: {surprise_rating}/5)\n{target_label}',
+                    color='red',
+                    fontsize=base_fontsize,
+                    fontweight='bold',
+                    bbox=dict(facecolor='white', alpha=0.7, edgecolor='none', pad=2),
+                    verticalalignment='bottom'
+                )
+        plt.axis('off')
+        print("Saving final image...")  # Debug print
+        buf = io.BytesIO()
+        plt.savefig(buf,
+                    format='png',
+                    dpi=dpi,
+                    bbox_inches='tight',
+                    pad_inches=0,
+                    metadata={'dpi': original_dpi})
+        buf.seek(0)
+        plt.close()
+        output_image = Image.open(buf)
+        output_image = output_image.resize(original_size, Image.Resampling.LANCZOS)
+        final_buf = io.BytesIO()
+        output_image.save(final_buf, format='PNG', dpi=original_dpi)
+        final_buf.seek(0)
+        return final_buf
+    except Exception as e:
+        print(f"Process image detection error: {str(e)}")  # Debug print
+        print(f"Error occurred at line {e.__traceback__.tb_lineno}")  # Debug print
+        raise
 def process_and_analyze(image):