Add custom handler for SAM2

Files changed (4) hide show

handler.py +36 -30
images/20250121_gauge_0001.jpg +0 -0
requirements.txt +4 -0
test_endpoint.py +66 -0

handler.py CHANGED Viewed

@@ -1,41 +1,47 @@
 from typing import Dict, List, Any
-from sam2.sam2_image_predictor import SAM2ImagePredictor
 import torch
-import numpy as np
-from PIL import Image
-import io
 class EndpointHandler:
     def __init__(self, path=""):
-        # Initialize SAM2 predictor with small model
-        self.predictor = SAM2ImagePredictor.from_pretrained("facebook/sam2-hiera-small")
-    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         """
         Args:
-            data: Dictionary with "inputs" key containing image bytes
         Returns:
-            Dictionary containing masks and scores
         """
-        # Get input image
-        if "inputs" not in data:
-            raise ValueError("No inputs provided")
-        # Convert input image bytes to PIL Image
-        image = Image.open(io.BytesIO(data["inputs"]))
-        image = np.array(image)
-        # Process with SAM2
-        with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
-            self.predictor.set_image(image)
-            masks, scores, _ = self.predictor.predict()
-        # Convert masks to lists for JSON serialization
-        if masks is not None:
-            masks = [mask.tolist() for mask in masks]
-            scores = scores.tolist() if scores is not None else None
-        return {
-            "masks": masks,
-            "scores": scores
-        }

 from typing import Dict, List, Any
+from transformers import SamModel, SamProcessor
 import torch
 class EndpointHandler:
     def __init__(self, path=""):
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model = SamModel.from_pretrained(path).to(self.device)
+        self.processor = SamProcessor.from_pretrained(path)
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
+        Handle image segmentation requests
         Args:
+            data: Dictionary containing:
+                inputs: Raw image bytes
         Returns:
+            List of dictionaries containing segmentation masks
         """
+        # Get raw image bytes from the request
+        raw_image = data.pop("inputs", data)
+        # Process the image
+        inputs = self.processor(raw_image, return_tensors="pt").to(self.device)
+        # Generate image embeddings
+        image_embeddings = self.model.get_image_embeddings(inputs["pixel_values"])
+        # Generate masks
+        outputs = self.model.generate(
+            image_embeddings=image_embeddings,
+            return_dict=True
+        )
+        # Process outputs
+        masks = outputs.pred_masks.squeeze().cpu().numpy()
+        scores = outputs.iou_scores.squeeze().cpu().numpy()
+        # Format response
+        results = []
+        for mask, score in zip(masks, scores):
+            results.append({
+                "mask": mask.tolist(),  # Convert numpy array to list for JSON serialization
+                "score": float(score)
+            })
+        return results

images/20250121_gauge_0001.jpg ADDED Viewed

requirements.txt CHANGED Viewed

	@@ -1 +1,5 @@
1	sam2

 sam2
+transformers
+torch
+pillow
+numpy

test_endpoint.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import requests
+from pathlib import Path
+from PIL import Image
+import io
+def get_stored_token():
+    """Get the stored HuggingFace token"""
+    token_path = Path.home() / '.cache/huggingface/token'
+    if token_path.exists():
+        with open(token_path, 'r') as f:
+            return f.read().strip()
+    return None
+# Update API URL to use the inference API endpoint
+API_URL = "https://c3g262qlc7cizj5n.us-east4.gcp.endpoints.huggingface.cloud"
+token = get_stored_token()
+def query(image_path):
+    # Read image bytes directly
+    with open(image_path, "rb") as f:
+        image_bytes = f.read()
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Content-Type": "image/jpeg"
+    }
+    # Print some debug info
+    print(f"Sending file: {image_path}")
+    print(f"Content-Type: {headers['Content-Type']}")
+    print(f"Image size: {len(image_bytes)} bytes")
+    response = requests.post(
+        API_URL,
+        headers=headers,
+        data=image_bytes,  # Send raw bytes
+        verify=True
+    )
+    # Add error handling
+    if response.status_code != 200:
+        print(f"Response headers: {response.headers}")
+        print(f"Request headers sent: {response.request.headers}")
+        return f"Error: {response.status_code}, {response.text}"
+    try:
+        return response.json()
+    except requests.exceptions.JSONDecodeError:
+        return f"Error decoding JSON. Raw response: {response.text}"
+# Test with an image
+if __name__ == "__main__":
+    # Option 1: Test with specific image
+    image_path = Path("images/20250121_gauge_0001.jpg")
+    # Option 2: Test with first image found in directory
+    # TRAIN_IMAGES_DIR = Path("images")
+    # image_path = next(TRAIN_IMAGES_DIR.glob('*.jpg'))
+    if not image_path.exists():
+        print(f"Error: Image not found at {image_path}")
+        exit(1)
+    print(f"Testing with image: {image_path}")
+    result = query(image_path)
+    print("\nAPI Response:")
+    print(result)