|
import { SamModel, AutoProcessor, RawImage, Tensor } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]';
|
|
|
|
|
|
const statusLabel = document.getElementById('status');
|
|
const fileUpload = document.getElementById('upload');
|
|
const imageContainer = document.getElementById('container');
|
|
const example = document.getElementById('example');
|
|
const maskCanvas = document.getElementById('mask-output');
|
|
const uploadButton = document.getElementById('upload-button');
|
|
const resetButton = document.getElementById('reset-image');
|
|
const clearButton = document.getElementById('clear-points');
|
|
const cutButton = document.getElementById('cut-mask');
|
|
|
|
|
|
let lastPoints = null;
|
|
let isDecoding = false;
|
|
let isMultiMaskMode = false;
|
|
let imageDataURI = null;
|
|
let imageInputs = null;
|
|
let imageEmbeddings = null;
|
|
|
|
|
|
const BASE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/';
|
|
const EXAMPLE_URL = BASE_URL + 'corgi.jpg';
|
|
|
|
|
|
const star = new Image();
|
|
star.src = BASE_URL + 'star-icon.png';
|
|
star.className = 'icon';
|
|
|
|
const cross = new Image();
|
|
cross.src = BASE_URL + 'cross-icon.png';
|
|
cross.className = 'icon';
|
|
|
|
async function decode() {
|
|
if (!imageInputs || !imageEmbeddings) {
|
|
return;
|
|
}
|
|
isDecoding = true;
|
|
|
|
|
|
const reshaped = imageInputs.reshaped_input_sizes[0];
|
|
const points = lastPoints.map(x => [x.point[0] * reshaped[1], x.point[1] * reshaped[0]])
|
|
const labels = lastPoints.map(x => BigInt(x.label));
|
|
|
|
const input_points = new Tensor(
|
|
'float32',
|
|
points.flat(Infinity),
|
|
[1, 1, points.length, 2],
|
|
)
|
|
const input_labels = new Tensor(
|
|
'int64',
|
|
labels.flat(Infinity),
|
|
[1, 1, labels.length],
|
|
)
|
|
|
|
|
|
const { pred_masks, iou_scores } = await model({
|
|
...imageEmbeddings,
|
|
input_points,
|
|
input_labels,
|
|
})
|
|
|
|
|
|
const masks = await processor.post_process_masks(
|
|
pred_masks,
|
|
imageInputs.original_sizes,
|
|
imageInputs.reshaped_input_sizes,
|
|
);
|
|
|
|
const data = {
|
|
mask: RawImage.fromTensor(masks[0][0]),
|
|
scores: iou_scores.data,
|
|
};
|
|
isDecoding = false;
|
|
|
|
if (!isMultiMaskMode && lastPoints) {
|
|
|
|
decode();
|
|
lastPoints = null;
|
|
}
|
|
|
|
const { mask, scores } = data;
|
|
|
|
|
|
if (maskCanvas.width !== mask.width || maskCanvas.height !== mask.height) {
|
|
maskCanvas.width = mask.width;
|
|
maskCanvas.height = mask.height;
|
|
}
|
|
|
|
|
|
const context = maskCanvas.getContext('2d');
|
|
const imageData = context.createImageData(maskCanvas.width, maskCanvas.height);
|
|
|
|
|
|
const numMasks = scores.length;
|
|
let bestIndex = 0;
|
|
for (let i = 1; i < numMasks; ++i) {
|
|
if (scores[i] > scores[bestIndex]) {
|
|
bestIndex = i;
|
|
}
|
|
}
|
|
statusLabel.textContent = `Segment score: ${scores[bestIndex].toFixed(2)}`;
|
|
|
|
|
|
const pixelData = imageData.data;
|
|
for (let i = 0; i < pixelData.length; ++i) {
|
|
if (mask.data[numMasks * i + bestIndex] === 1) {
|
|
const offset = 4 * i;
|
|
pixelData[offset] = 0;
|
|
pixelData[offset + 1] = 114;
|
|
pixelData[offset + 2] = 189;
|
|
pixelData[offset + 3] = 255;
|
|
}
|
|
}
|
|
|
|
|
|
context.putImageData(imageData, 0, 0);
|
|
}
|
|
|
|
function clearPointsAndMask() {
|
|
|
|
isMultiMaskMode = false;
|
|
lastPoints = null;
|
|
|
|
|
|
document.querySelectorAll('.icon').forEach(e => e.remove());
|
|
|
|
|
|
cutButton.disabled = true;
|
|
|
|
|
|
maskCanvas.getContext('2d').clearRect(0, 0, maskCanvas.width, maskCanvas.height);
|
|
}
|
|
clearButton.addEventListener('click', clearPointsAndMask);
|
|
|
|
resetButton.addEventListener('click', () => {
|
|
|
|
imageEmbeddings = null;
|
|
imageDataURI = null;
|
|
|
|
|
|
imageInputs = null;
|
|
imageEmbeddings = null;
|
|
isDecoding = false;
|
|
|
|
|
|
clearPointsAndMask();
|
|
|
|
|
|
cutButton.disabled = true;
|
|
imageContainer.style.backgroundImage = 'none';
|
|
uploadButton.style.display = 'flex';
|
|
statusLabel.textContent = 'Ready';
|
|
});
|
|
|
|
async function segment(data) {
|
|
statusLabel.textContent = 'Extracting image embedding...';
|
|
|
|
|
|
imageEmbeddings = null;
|
|
imageDataURI = data;
|
|
|
|
|
|
imageContainer.style.backgroundImage = `url(${data})`;
|
|
uploadButton.style.display = 'none';
|
|
cutButton.disabled = true;
|
|
|
|
|
|
const image = await RawImage.read(data);
|
|
imageInputs = await processor(image);
|
|
imageEmbeddings = await model.get_image_embeddings(imageInputs)
|
|
|
|
statusLabel.textContent = 'Embedding extracted!';
|
|
}
|
|
|
|
|
|
fileUpload.addEventListener('change', function (e) {
|
|
const file = e.target.files[0];
|
|
if (!file) {
|
|
return;
|
|
}
|
|
|
|
const reader = new FileReader();
|
|
|
|
|
|
reader.onload = e2 => segment(e2.target.result);
|
|
|
|
reader.readAsDataURL(file);
|
|
});
|
|
|
|
example.addEventListener('click', (e) => {
|
|
e.preventDefault();
|
|
segment(EXAMPLE_URL);
|
|
});
|
|
|
|
function addIcon({ point, label }) {
|
|
const icon = (label === 1 ? star : cross).cloneNode();
|
|
icon.style.left = `${point[0] * 100}%`;
|
|
icon.style.top = `${point[1] * 100}%`;
|
|
imageContainer.appendChild(icon);
|
|
}
|
|
|
|
|
|
imageContainer.addEventListener('mousedown', e => {
|
|
if (e.button !== 0 && e.button !== 2) {
|
|
return;
|
|
}
|
|
if (!imageEmbeddings) {
|
|
return;
|
|
}
|
|
if (!isMultiMaskMode) {
|
|
lastPoints = [];
|
|
isMultiMaskMode = true;
|
|
cutButton.disabled = false;
|
|
}
|
|
|
|
const point = getPoint(e);
|
|
lastPoints.push(point);
|
|
|
|
|
|
addIcon(point);
|
|
|
|
decode();
|
|
});
|
|
|
|
|
|
|
|
function clamp(x, min = 0, max = 1) {
|
|
return Math.max(Math.min(x, max), min)
|
|
}
|
|
|
|
function getPoint(e) {
|
|
|
|
const bb = imageContainer.getBoundingClientRect();
|
|
|
|
|
|
const mouseX = clamp((e.clientX - bb.left) / bb.width);
|
|
const mouseY = clamp((e.clientY - bb.top) / bb.height);
|
|
|
|
return {
|
|
point: [mouseX, mouseY],
|
|
label: e.button === 2
|
|
? 0
|
|
: 1,
|
|
}
|
|
}
|
|
|
|
|
|
imageContainer.addEventListener('contextmenu', e => {
|
|
e.preventDefault();
|
|
});
|
|
|
|
|
|
imageContainer.addEventListener('mousemove', e => {
|
|
if (!imageEmbeddings || isMultiMaskMode) {
|
|
|
|
|
|
return;
|
|
}
|
|
lastPoints = [getPoint(e)];
|
|
|
|
if (!isDecoding) {
|
|
decode();
|
|
}
|
|
});
|
|
|
|
|
|
cutButton.addEventListener('click', () => {
|
|
const [w, h] = [maskCanvas.width, maskCanvas.height];
|
|
|
|
|
|
const maskContext = maskCanvas.getContext('2d');
|
|
const maskPixelData = maskContext.getImageData(0, 0, w, h);
|
|
|
|
|
|
const image = new Image();
|
|
image.crossOrigin = 'anonymous';
|
|
image.onload = async () => {
|
|
|
|
const imageCanvas = new OffscreenCanvas(w, h);
|
|
const imageContext = imageCanvas.getContext('2d');
|
|
imageContext.drawImage(image, 0, 0, w, h);
|
|
const imagePixelData = imageContext.getImageData(0, 0, w, h);
|
|
|
|
|
|
const cutCanvas = new OffscreenCanvas(w, h);
|
|
const cutContext = cutCanvas.getContext('2d');
|
|
const cutPixelData = cutContext.getImageData(0, 0, w, h);
|
|
|
|
|
|
for (let i = 3; i < maskPixelData.data.length; i += 4) {
|
|
if (maskPixelData.data[i] > 0) {
|
|
for (let j = 0; j < 4; ++j) {
|
|
const offset = i - j;
|
|
cutPixelData.data[offset] = imagePixelData.data[offset];
|
|
}
|
|
}
|
|
}
|
|
cutContext.putImageData(cutPixelData, 0, 0);
|
|
|
|
|
|
const link = document.createElement('a');
|
|
link.download = 'image.png';
|
|
link.href = URL.createObjectURL(await cutCanvas.convertToBlob());
|
|
link.click();
|
|
link.remove();
|
|
}
|
|
image.src = imageDataURI;
|
|
});
|
|
|
|
|
|
const model_id = 'Xenova/slimsam-77-uniform';
|
|
statusLabel.textContent = 'Loading model...';
|
|
const model = await SamModel.from_pretrained(model_id, {
|
|
dtype: 'fp16',
|
|
device: 'webgpu',
|
|
});
|
|
const processor = await AutoProcessor.from_pretrained(model_id);
|
|
statusLabel.textContent = 'Ready';
|
|
|
|
|
|
fileUpload.disabled = false;
|
|
uploadButton.style.opacity = 1;
|
|
example.style.pointerEvents = 'auto';
|
|
|