Spaces:

Realcat
/

image-matching-webui

Running

File size: 3,977 Bytes

89c9b15

import sys
from pathlib import Path
import tempfile
import torch
from PIL import Image

from .. import MODEL_REPO_ID, logger
from ..utils.base_model import BaseModel

roma_path = Path(__file__).parent / "../../third_party/RoMa"
sys.path.append(str(roma_path))
from romatch.models.model_zoo import roma_model

dad_path = Path(__file__).parent / "../../third_party/dad"
sys.path.append(str(dad_path))
import dad as dad_detector

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


class Dad(BaseModel):
    default_conf = {
        "name": "two_view_pipeline",
        "model_name": "roma_outdoor.pth",
        "model_utils_name": "dinov2_vitl14_pretrain.pth",
        "max_keypoints": 3000,
        "coarse_res": (560, 560),
        "upsample_res": (864, 1152),
    }
    required_inputs = [
        "image0",
        "image1",
    ]

    # Initialize the line matcher
    def _init(self, conf):
        model_path = self._download_model(
            repo_id=MODEL_REPO_ID,
            filename="{}/{}".format("roma", self.conf["model_name"]),
        )

        dinov2_weights = self._download_model(
            repo_id=MODEL_REPO_ID,
            filename="{}/{}".format("roma", self.conf["model_utils_name"]),
        )

        logger.info("Loading Dad + Roma model")
        # load the model
        weights = torch.load(model_path, map_location="cpu")
        dinov2_weights = torch.load(dinov2_weights, map_location="cpu")

        if str(device) == "cpu":
            amp_dtype = torch.float32
        else:
            amp_dtype = torch.float16

        self.matcher = roma_model(
            resolution=self.conf["coarse_res"],
            upsample_preds=True,
            weights=weights,
            dinov2_weights=dinov2_weights,
            device=device,
            amp_dtype=amp_dtype,
        )
        self.matcher.upsample_res = self.conf["upsample_res"]
        self.matcher.symmetric = False

        self.detector = dad_detector.load_DaD()
        logger.info("Load Dad + Roma model done.")

    def _forward(self, data):
        img0 = data["image0"].cpu().numpy().squeeze() * 255
        img1 = data["image1"].cpu().numpy().squeeze() * 255
        img0 = img0.transpose(1, 2, 0)
        img1 = img1.transpose(1, 2, 0)
        img0 = Image.fromarray(img0.astype("uint8"))
        img1 = Image.fromarray(img1.astype("uint8"))
        W_A, H_A = img0.size
        W_B, H_B = img1.size

        # hack: bad way to save then match
        with (
            tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_img0,
            tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_img1,
        ):
            img0_path = temp_img0.name
            img1_path = temp_img1.name
            img0.save(img0_path)
            img1.save(img1_path)

        # Match
        warp, certainty = self.matcher.match(img0_path, img1_path, device=device)
        # Detect
        keypoints_A = self.detector.detect_from_path(
            img0_path,
            num_keypoints=self.conf["max_keypoints"],
        )["keypoints"][0]
        keypoints_B = self.detector.detect_from_path(
            img1_path,
            num_keypoints=self.conf["max_keypoints"],
        )["keypoints"][0]
        matches = self.matcher.match_keypoints(
            keypoints_A,
            keypoints_B,
            warp,
            certainty,
            return_tuple=False,
        )

        # Sample matches for estimation
        kpts1, kpts2 = self.matcher.to_pixel_coordinates(matches, H_A, W_A, H_B, W_B)
        offset = self.detector.topleft - 0
        kpts1, kpts2 = kpts1 - offset, kpts2 - offset
        pred = {
            "keypoints0": self.matcher._to_pixel_coordinates(keypoints_A, H_A, W_A),
            "keypoints1": self.matcher._to_pixel_coordinates(keypoints_B, H_B, W_B),
            "mkeypoints0": kpts1,
            "mkeypoints1": kpts2,
            "mconf": torch.ones_like(kpts1[:, 0]),
        }
        return pred