Spaces:

Xenova
/

segment-anything-webgpu

Running

App Files Files Community

Xenova HF Staff commited on Feb 7

Commit

ece4a6d

verified ·

1 Parent(s): f0c221e

Create index.js

Browse files

Files changed (1) hide show

index.js +301 -0

index.js ADDED Viewed

	@@ -0,0 +1,301 @@

+import {
+  SamModel,
+  AutoProcessor,
+  RawImage,
+  Tensor,
+} from "https://cdn.jsdelivr.net/npm/@huggingface/[email protected]";
+// Reference the elements we will use
+const statusLabel = document.getElementById("status");
+const fileUpload = document.getElementById("upload");
+const imageContainer = document.getElementById("container");
+const example = document.getElementById("example");
+const uploadButton = document.getElementById("upload-button");
+const resetButton = document.getElementById("reset-image");
+const clearButton = document.getElementById("clear-points");
+const cutButton = document.getElementById("cut-mask");
+const starIcon = document.getElementById("star-icon");
+const crossIcon = document.getElementById("cross-icon");
+const maskCanvas = document.getElementById("mask-output");
+const maskContext = maskCanvas.getContext("2d");
+const EXAMPLE_URL =
+  "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/corgi.jpg";
+// State variables
+let isEncoding = false;
+let isDecoding = false;
+let decodePending = false;
+let lastPoints = null;
+let isMultiMaskMode = false;
+let imageInput = null;
+let imageProcessed = null;
+let imageEmbeddings = null;
+async function decode() {
+  // Only proceed if we are not already decoding
+  if (isDecoding) {
+    decodePending = true;
+    return;
+  }
+  isDecoding = true;
+  // Prepare inputs for decoding
+  const reshaped = imageProcessed.reshaped_input_sizes[0];
+  const points = lastPoints
+    .map((x) => [x.position[0] * reshaped[1], x.position[1] * reshaped[0]])
+    .flat(Infinity);
+  const labels = lastPoints.map((x) => BigInt(x.label)).flat(Infinity);
+  const num_points = lastPoints.length;
+  const input_points = new Tensor("float32", points, [1, 1, num_points, 2]);
+  const input_labels = new Tensor("int64", labels, [1, 1, num_points]);
+  // Generate the mask
+  const { pred_masks, iou_scores } = await model({
+    ...imageEmbeddings,
+    input_points,
+    input_labels,
+  });
+  // Post-process the mask
+  const masks = await processor.post_process_masks(
+    pred_masks,
+    imageProcessed.original_sizes,
+    imageProcessed.reshaped_input_sizes,
+  );
+  isDecoding = false;
+  updateMaskOverlay(RawImage.fromTensor(masks[0][0]), iou_scores.data);
+  // Check if another decode is pending
+  if (decodePending) {
+    decodePending = false;
+    decode();
+  }
+}
+function updateMaskOverlay(mask, scores) {
+  // Update canvas dimensions (if different)
+  if (maskCanvas.width !== mask.width || maskCanvas.height !== mask.height) {
+    maskCanvas.width = mask.width;
+    maskCanvas.height = mask.height;
+  }
+  // Allocate buffer for pixel data
+  const imageData = maskContext.createImageData(
+    maskCanvas.width,
+    maskCanvas.height,
+  );
+  // Select best mask
+  const numMasks = scores.length; // 3
+  let bestIndex = 0;
+  for (let i = 1; i < numMasks; ++i) {
+    if (scores[i] > scores[bestIndex]) {
+      bestIndex = i;
+    }
+  }
+  statusLabel.textContent = `Segment score: ${scores[bestIndex].toFixed(2)}`;
+  // Fill mask with colour
+  const pixelData = imageData.data;
+  for (let i = 0; i < pixelData.length; ++i) {
+    if (mask.data[numMasks * i + bestIndex] === 1) {
+      const offset = 4 * i;
+      pixelData[offset] = 0; // red
+      pixelData[offset + 1] = 114; // green
+      pixelData[offset + 2] = 189; // blue
+      pixelData[offset + 3] = 255; // alpha
+    }
+  }
+  // Draw image data to context
+  maskContext.putImageData(imageData, 0, 0);
+}
+function clearPointsAndMask() {
+  // Reset state
+  isMultiMaskMode = false;
+  lastPoints = null;
+  // Remove points from previous mask (if any)
+  document.querySelectorAll(".icon").forEach((e) => e.remove());
+  // Disable cut button
+  cutButton.disabled = true;
+  // Reset mask canvas
+  maskContext.clearRect(0, 0, maskCanvas.width, maskCanvas.height);
+}
+clearButton.addEventListener("click", clearPointsAndMask);
+resetButton.addEventListener("click", () => {
+  // Reset the state
+  imageInput = null;
+  imageProcessed = null;
+  imageEmbeddings = null;
+  isEncoding = false;
+  isDecoding = false;
+  // Clear points and mask (if present)
+  clearPointsAndMask();
+  // Update UI
+  cutButton.disabled = true;
+  imageContainer.style.backgroundImage = "none";
+  uploadButton.style.display = "flex";
+  statusLabel.textContent = "Ready";
+});
+async function encode(url) {
+  if (isEncoding) return;
+  isEncoding = true;
+  statusLabel.textContent = "Extracting image embedding...";
+  imageInput = await RawImage.fromURL(url);
+  // Update UI
+  imageContainer.style.backgroundImage = `url(${url})`;
+  uploadButton.style.display = "none";
+  cutButton.disabled = true;
+  // Recompute image embeddings
+  imageProcessed = await processor(imageInput);
+  imageEmbeddings = await model.get_image_embeddings(imageProcessed);
+  statusLabel.textContent = "Embedding extracted!";
+  isEncoding = false;
+}
+// Handle file selection
+fileUpload.addEventListener("change", function (e) {
+  const file = e.target.files[0];
+  if (!file) return;
+  const reader = new FileReader();
+  // Set up a callback when the file is loaded
+  reader.onload = (e2) => encode(e2.target.result);
+  reader.readAsDataURL(file);
+});
+example.addEventListener("click", (e) => {
+  e.preventDefault();
+  encode(EXAMPLE_URL);
+});
+// Attach hover event to image container
+imageContainer.addEventListener("mousedown", (e) => {
+  if (e.button !== 0 && e.button !== 2) {
+    return; // Ignore other buttons
+  }
+  if (!imageEmbeddings) {
+    return; // Ignore if not encoded yet
+  }
+  if (!isMultiMaskMode) {
+    lastPoints = [];
+    isMultiMaskMode = true;
+    cutButton.disabled = false;
+  }
+  const point = getPoint(e);
+  lastPoints.push(point);
+  // add icon
+  const icon = (point.label === 1 ? starIcon : crossIcon).cloneNode();
+  icon.style.left = `${point.position[0] * 100}%`;
+  icon.style.top = `${point.position[1] * 100}%`;
+  imageContainer.appendChild(icon);
+  // Run decode
+  decode();
+});
+// Clamp a value inside a range [min, max]
+function clamp(x, min = 0, max = 1) {
+  return Math.max(Math.min(x, max), min);
+}
+function getPoint(e) {
+  // Get bounding box
+  const bb = imageContainer.getBoundingClientRect();
+  // Get the mouse coordinates relative to the container
+  const mouseX = clamp((e.clientX - bb.left) / bb.width);
+  const mouseY = clamp((e.clientY - bb.top) / bb.height);
+  return {
+    position: [mouseX, mouseY],
+    label:
+      e.button === 2 // right click
+        ? 0 // negative prompt
+        : 1, // positive prompt
+  };
+}
+// Do not show context menu on right click
+imageContainer.addEventListener("contextmenu", (e) => e.preventDefault());
+// Attach hover event to image container
+imageContainer.addEventListener("mousemove", (e) => {
+  if (!imageEmbeddings || isMultiMaskMode) {
+    // Ignore mousemove events if the image is not encoded yet,
+    // or we are in multi-mask mode
+    return;
+  }
+  lastPoints = [getPoint(e)];
+  decode();
+});
+// Handle cut button click
+cutButton.addEventListener("click", async () => {
+  const [w, h] = [maskCanvas.width, maskCanvas.height];
+  // Get the mask pixel data (and use this as a buffer)
+  const maskImageData = maskContext.getImageData(0, 0, w, h);
+  // Create a new canvas to hold the cut-out
+  const cutCanvas = new OffscreenCanvas(w, h);
+  const cutContext = cutCanvas.getContext("2d");
+  // Copy the image pixel data to the cut canvas
+  const maskPixelData = maskImageData.data;
+  const imagePixelData = imageInput.data;
+  for (let i = 0; i < w * h; ++i) {
+    const sourceOffset = 3 * i; // RGB
+    const targetOffset = 4 * i; // RGBA
+    if (maskPixelData[targetOffset + 3] > 0) {
+      // Only copy opaque pixels
+      for (let j = 0; j < 3; ++j) {
+        maskPixelData[targetOffset + j] = imagePixelData[sourceOffset + j];
+      }
+    }
+  }
+  cutContext.putImageData(maskImageData, 0, 0);
+  // Download image
+  const link = document.createElement("a");
+  link.download = "image.png";
+  link.href = URL.createObjectURL(await cutCanvas.convertToBlob());
+  link.click();
+  link.remove();
+});
+const model_id = "Xenova/slimsam-77-uniform";
+statusLabel.textContent = "Loading model...";
+const model = await SamModel.from_pretrained(model_id, {
+  dtype: "fp16", // or "fp32"
+  device: "webgpu",
+});
+const processor = await AutoProcessor.from_pretrained(model_id);
+statusLabel.textContent = "Ready";
+// Enable the user interface
+fileUpload.disabled = false;
+uploadButton.style.opacity = 1;
+example.style.pointerEvents = "auto";