Spaces:

reab5555
/

AI-Image-Anomaly-Detection

Sleeping

App Files Files Community

reab5555 commited on Jan 7

Commit

e1b87b0

verified ·

1 Parent(s): aff928e

Update app.py

Browse files

Files changed (1) hide show

app.py +167 -162

app.py CHANGED Viewed

@@ -143,168 +143,173 @@ def show_mask(mask, ax, random_color=False):
 def process_image_detection(image, target_label, surprise_rating):
-    try:
-        # Handle different image input types
-        if isinstance(image, tuple):
-            if len(image) > 0 and image[0] is not None:
-                if isinstance(image[0], np.ndarray):
-                    image = Image.fromarray(image[0])
-                else:
-                    image = image[0]
-            else:
-                raise ValueError("Invalid image tuple provided")
-        elif isinstance(image, np.ndarray):
-            image = Image.fromarray(image)
-        elif isinstance(image, str):
-            image = Image.open(image)
-        # Ensure image is in PIL Image format
-        if not isinstance(image, Image.Image):
-            raise ValueError(f"Input must be a PIL Image, got {type(image)}")
-        # Ensure image is in RGB mode
-        if image.mode != 'RGB':
-            image = image.convert('RGB')
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        print(f"Using device: {device}")
-        # Get original image DPI and size
-        original_dpi = image.info.get('dpi', (72, 72))
-        original_size = image.size
-        print(f"Image size: {original_size}")
-        # Calculate relative font size based on image dimensions
-        base_fontsize = min(original_size) / 40
-        print("Loading models...")
-        owlv2_processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
-        owlv2_model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16").to(device)
-        sam_processor = AutoProcessor.from_pretrained("facebook/sam-vit-base")
-        sam_model = AutoModelForMaskGeneration.from_pretrained("facebook/sam-vit-base").to(device)
-        print("Running object detection...")
-        inputs = owlv2_processor(text=[target_label], images=image, return_tensors="pt").to(device)
-        with torch.no_grad():
-            outputs = owlv2_model(**inputs)
-        target_sizes = torch.tensor([image.size[::-1]]).to(device)
-        results = owlv2_processor.post_process_object_detection(outputs, target_sizes=target_sizes)[0]
-        dpi = 300
-        figsize = (original_size[0] / dpi, original_size[1] / dpi)
-        fig = plt.figure(figsize=figsize, dpi=dpi)
-        ax = plt.Axes(fig, [0., 0., 1., 1.])
-        fig.add_axes(ax)
-        ax.imshow(image)
-        scores = results["scores"]
-        if len(scores) > 0:
-            max_score_idx = scores.argmax().item()
-            max_score = scores[max_score_idx].item()
-            if max_score > 0.2:
-                print("Processing detection results...")
-                box = results["boxes"][max_score_idx].cpu().numpy()
-                print("Running SAM model...")
-                # Convert image to numpy array if needed for SAM
-                if isinstance(image, Image.Image):
-                    image_np = np.array(image)
-                else:
-                    image_np = image
-                sam_inputs = sam_processor(
-                    image_np,
-                    input_boxes=[[[box[0], box[1], box[2], box[3]]]],
-                    return_tensors="pt"
-                ).to(device)
-                with torch.no_grad():
-                    sam_outputs = sam_model(**sam_inputs)
-                masks = sam_processor.image_processor.post_process_masks(
-                    sam_outputs.pred_masks.cpu(),
-                    sam_inputs["original_sizes"].cpu(),
-                    sam_inputs["reshaped_input_sizes"].cpu()
-                )
-                print(f"Mask type: {type(masks)}, Mask shape: {len(masks)}")
-                mask = masks[0]
-                if isinstance(mask, torch.Tensor):
-                    mask = mask.numpy()
-                show_mask(mask, ax=ax)
-                rect = patches.Rectangle(
-                    (box[0], box[1]),
-                    box[2] - box[0],
-                    box[3] - box[1],
-                    linewidth=max(2, min(original_size) / 500),
-                    edgecolor='red',
-                    facecolor='none'
-                )
-                ax.add_patch(rect)
-                plt.text(
-                    box[0], box[1] - base_fontsize,
-                    f'{max_score:.2f}',
-                    color='red',
-                    fontsize=base_fontsize,
-                    fontweight='bold',
-                    bbox=dict(facecolor='white', alpha=0.7, edgecolor='none', pad=2)
-                )
-                plt.text(
-                    box[2] + base_fontsize / 2, box[1],
-                    f'Unexpected (Rating: {surprise_rating}/5)\n{target_label}',
-                    color='red',
-                    fontsize=base_fontsize,
-                    fontweight='bold',
-                    bbox=dict(facecolor='white', alpha=0.7, edgecolor='none', pad=2),
-                    verticalalignment='bottom'
-                )
-        plt.axis('off')
-        print("Saving final image...")
-        try:
-            # Save directly to buffer using savefig
-            buf = io.BytesIO()
-            fig.savefig(buf,
-                       format='png',
-                       dpi=dpi,
-                       bbox_inches='tight',
-                       pad_inches=0)
-            buf.seek(0)
-            # Open as PIL Image
-            output_image = Image.open(buf)
-            # Convert to RGB if needed
-            if output_image.mode != 'RGB':
-                output_image = output_image.convert('RGB')
-            # Resize to original size if needed
-            if output_image.size != original_size:
-                output_image = output_image.resize(original_size, Image.Resampling.LANCZOS)
-            # Save to final buffer
-            final_buf = io.BytesIO()
-            output_image.save(final_buf, format='PNG', dpi=original_dpi)
-            final_buf.seek(0)
-            # Cleanup
-            plt.close(fig)
-            buf.close()
-            return final_buf
-        except Exception as e:
-            print(f"Save error details: {str(e)}")
-            print(f"Figure type: {type(fig)}")
-            print(f"Canvas type: {type(fig.canvas)}")
-            raise
 def process_and_analyze(image):
     if image is None:

 def process_image_detection(image, target_label, surprise_rating):
+   try:
+       # Handle different image input types
+       if isinstance(image, tuple):
+           if len(image) > 0 and image[0] is not None:
+               if isinstance(image[0], np.ndarray):
+                   image = Image.fromarray(image[0])
+               else:
+                   image = image[0]
+           else:
+               raise ValueError("Invalid image tuple provided")
+       elif isinstance(image, np.ndarray):
+           image = Image.fromarray(image)
+       elif isinstance(image, str):
+           image = Image.open(image)
+       # Ensure image is in PIL Image format
+       if not isinstance(image, Image.Image):
+           raise ValueError(f"Input must be a PIL Image, got {type(image)}")
+       # Ensure image is in RGB mode
+       if image.mode != 'RGB':
+           image = image.convert('RGB')
+       device = "cuda" if torch.cuda.is_available() else "cpu"
+       print(f"Using device: {device}")
+       # Get original image DPI and size
+       original_dpi = image.info.get('dpi', (72, 72))
+       original_size = image.size
+       print(f"Image size: {original_size}")
+       # Calculate relative font size based on image dimensions
+       base_fontsize = min(original_size) / 40
+       print("Loading models...")
+       owlv2_processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
+       owlv2_model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16").to(device)
+       sam_processor = AutoProcessor.from_pretrained("facebook/sam-vit-base")
+       sam_model = AutoModelForMaskGeneration.from_pretrained("facebook/sam-vit-base").to(device)
+       print("Running object detection...")
+       inputs = owlv2_processor(text=[target_label], images=image, return_tensors="pt").to(device)
+       with torch.no_grad():
+           outputs = owlv2_model(**inputs)
+       target_sizes = torch.tensor([image.size[::-1]]).to(device)
+       results = owlv2_processor.post_process_object_detection(outputs, target_sizes=target_sizes)[0]
+       dpi = 300
+       figsize = (original_size[0] / dpi, original_size[1] / dpi)
+       fig = plt.figure(figsize=figsize, dpi=dpi)
+       ax = plt.Axes(fig, [0., 0., 1., 1.])
+       fig.add_axes(ax)
+       ax.imshow(image)
+       scores = results["scores"]
+       if len(scores) > 0:
+           max_score_idx = scores.argmax().item()
+           max_score = scores[max_score_idx].item()
+           if max_score > 0.2:
+               print("Processing detection results...")
+               box = results["boxes"][max_score_idx].cpu().numpy()
+               print("Running SAM model...")
+               # Convert image to numpy array if needed for SAM
+               if isinstance(image, Image.Image):
+                   image_np = np.array(image)
+               else:
+                   image_np = image
+               sam_inputs = sam_processor(
+                   image_np,
+                   input_boxes=[[[box[0], box[1], box[2], box[3]]]],
+                   return_tensors="pt"
+               ).to(device)
+               with torch.no_grad():
+                   sam_outputs = sam_model(**sam_inputs)
+               masks = sam_processor.image_processor.post_process_masks(
+                   sam_outputs.pred_masks.cpu(),
+                   sam_inputs["original_sizes"].cpu(),
+                   sam_inputs["reshaped_input_sizes"].cpu()
+               )
+               print(f"Mask type: {type(masks)}, Mask shape: {len(masks)}")
+               mask = masks[0]
+               if isinstance(mask, torch.Tensor):
+                   mask = mask.numpy()
+               show_mask(mask, ax=ax)
+               rect = patches.Rectangle(
+                   (box[0], box[1]),
+                   box[2] - box[0],
+                   box[3] - box[1],
+                   linewidth=max(2, min(original_size) / 500),
+                   edgecolor='red',
+                   facecolor='none'
+               )
+               ax.add_patch(rect)
+               plt.text(
+                   box[0], box[1] - base_fontsize,
+                   f'{max_score:.2f}',
+                   color='red',
+                   fontsize=base_fontsize,
+                   fontweight='bold',
+                   bbox=dict(facecolor='white', alpha=0.7, edgecolor='none', pad=2)
+               )
+               plt.text(
+                   box[2] + base_fontsize / 2, box[1],
+                   f'Unexpected (Rating: {surprise_rating}/5)\n{target_label}',
+                   color='red',
+                   fontsize=base_fontsize,
+                   fontweight='bold',
+                   bbox=dict(facecolor='white', alpha=0.7, edgecolor='none', pad=2),
+                   verticalalignment='bottom'
+               )
+       plt.axis('off')
+       print("Saving final image...")
+       try:
+           # Save directly to buffer using savefig
+           buf = io.BytesIO()
+           fig.savefig(buf,
+                      format='png',
+                      dpi=dpi,
+                      bbox_inches='tight',
+                      pad_inches=0)
+           buf.seek(0)
+           # Open as PIL Image
+           output_image = Image.open(buf)
+           # Convert to RGB if needed
+           if output_image.mode != 'RGB':
+               output_image = output_image.convert('RGB')
+           # Resize to original size if needed
+           if output_image.size != original_size:
+               output_image = output_image.resize(original_size, Image.Resampling.LANCZOS)
+           # Save to final buffer
+           final_buf = io.BytesIO()
+           output_image.save(final_buf, format='PNG', dpi=original_dpi)
+           final_buf.seek(0)
+           # Cleanup
+           plt.close(fig)
+           buf.close()
+           return final_buf
+       except Exception as e:
+           print(f"Save error details: {str(e)}")
+           print(f"Figure type: {type(fig)}")
+           print(f"Canvas type: {type(fig.canvas)}")
+           raise
+   except Exception as e:
+       print(f"Process image detection error: {str(e)}")
+       print(f"Error occurred at line {e.__traceback__.tb_lineno}")
+       raise
 def process_and_analyze(image):
     if image is None: