Spaces:

Realcat
/

image-matching-webui

Running on Zero

App Files Files Community

Realcat commited on Mar 11

Commit

89c9b15

1 Parent(s): bd20887

add: dad detector with roma matcher

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +1 -0
config/config.yaml +23 -0
imcui/hloc/match_dense.py +48 -5
imcui/hloc/matchers/dad_roma.py +121 -0
imcui/hloc/matchers/roma.py +10 -4
imcui/hloc/matchers/xfeat_dense.py +4 -2
imcui/hloc/matchers/xfeat_lightglue.py +4 -2
imcui/third_party/RoMa/.gitignore +11 -0
imcui/third_party/RoMa/LICENSE +21 -0
imcui/third_party/RoMa/README.md +123 -0
imcui/third_party/RoMa/data/.gitignore +2 -0
imcui/third_party/RoMa/requirements.txt +14 -0
imcui/third_party/RoMa/romatch/models/matcher.py +68 -32
imcui/third_party/RoMa/romatch/models/transformer/layers/attention.py +1 -1
imcui/third_party/RoMa/romatch/models/transformer/layers/block.py +1 -1
imcui/third_party/RoMa/romatch/utils/utils.py +9 -1
imcui/third_party/RoMa/setup.py +1 -1
imcui/third_party/dad/.gitignore +170 -0
imcui/third_party/dad/.python-version +1 -0
imcui/third_party/dad/LICENSE +21 -0
imcui/third_party/dad/README.md +130 -0
imcui/third_party/dad/dad/__init__.py +17 -0
imcui/third_party/dad/dad/augs.py +214 -0
imcui/third_party/dad/dad/benchmarks/__init__.py +21 -0
imcui/third_party/dad/dad/benchmarks/hpatches.py +117 -0
imcui/third_party/dad/dad/benchmarks/megadepth.py +219 -0
imcui/third_party/dad/dad/benchmarks/num_inliers.py +106 -0
imcui/third_party/dad/dad/benchmarks/scannet.py +163 -0
imcui/third_party/dad/dad/checkpoint.py +61 -0
imcui/third_party/dad/dad/datasets/__init__.py +0 -0
imcui/third_party/dad/dad/datasets/megadepth.py +312 -0
imcui/third_party/dad/dad/detectors/__init__.py +50 -0
imcui/third_party/dad/dad/detectors/dedode_detector.py +559 -0
imcui/third_party/dad/dad/detectors/third_party/__init__.py +11 -0
imcui/third_party/dad/dad/detectors/third_party/harrisaff.py +35 -0
imcui/third_party/dad/dad/detectors/third_party/hesaff.py +40 -0
imcui/third_party/dad/dad/detectors/third_party/lightglue/__init__.py +9 -0
imcui/third_party/dad/dad/detectors/third_party/lightglue/aliked.py +770 -0
imcui/third_party/dad/dad/detectors/third_party/lightglue/disk.py +48 -0
imcui/third_party/dad/dad/detectors/third_party/lightglue/dog_hardnet.py +41 -0
imcui/third_party/dad/dad/detectors/third_party/lightglue/lightglue.py +655 -0
imcui/third_party/dad/dad/detectors/third_party/lightglue/sift.py +216 -0
imcui/third_party/dad/dad/detectors/third_party/lightglue/superpoint.py +233 -0
imcui/third_party/dad/dad/detectors/third_party/lightglue/utils.py +158 -0
imcui/third_party/dad/dad/detectors/third_party/lightglue_detector.py +42 -0
imcui/third_party/dad/dad/detectors/third_party/rekd/config.py +206 -0
imcui/third_party/dad/dad/detectors/third_party/rekd/geometry_tools.py +204 -0
imcui/third_party/dad/dad/detectors/third_party/rekd/model/REKD.py +234 -0
imcui/third_party/dad/dad/detectors/third_party/rekd/model/kernels.py +118 -0
imcui/third_party/dad/dad/detectors/third_party/rekd/model/load_models.py +25 -0

README.md CHANGED Viewed

@@ -44,6 +44,7 @@ The tool currently supports various popular image matching algorithms, namely:
 | Algorithm        | Supported | Conference/Journal | Year | GitHub Link |
 |------------------|-----------|--------------------|------|-------------|
 | MINIMA         | ✅ | ARXIV   | 2024 | [Link](https://github.com/LSXI7/MINIMA) |
 | XoFTR          | ✅ | CVPR    | 2024 | [Link](https://github.com/OnderT/XoFTR) |
 | EfficientLoFTR | ✅ | CVPR    | 2024 | [Link](https://github.com/zju3dv/EfficientLoFTR) |

 | Algorithm        | Supported | Conference/Journal | Year | GitHub Link |
 |------------------|-----------|--------------------|------|-------------|
+| DaD            | ✅ | ARXIV   | 2025 | [Link](https://github.com/Parskatt/dad) |
 | MINIMA         | ✅ | ARXIV   | 2024 | [Link](https://github.com/LSXI7/MINIMA) |
 | XoFTR          | ✅ | CVPR    | 2024 | [Link](https://github.com/OnderT/XoFTR) |
 | EfficientLoFTR | ✅ | CVPR    | 2024 | [Link](https://github.com/zju3dv/EfficientLoFTR) |

config/config.yaml CHANGED Viewed

@@ -43,6 +43,17 @@ matcher_zoo:
       # low, medium, high
       efficiency: low
   minima(loftr):
     matcher: minima_loftr
     dense: true
@@ -50,6 +61,7 @@ matcher_zoo:
       name: MINIMA(LoFTR) #dispaly name
       source: "ARXIV 2024"
       paper: https://arxiv.org/abs/2412.19412
       display: true
   minima(RoMa):
     matcher: minima_roma
@@ -59,6 +71,7 @@ matcher_zoo:
       name: MINIMA(RoMa) #dispaly name
       source: "ARXIV 2024"
       paper: https://arxiv.org/abs/2412.19412
       display: false
       efficiency: low  # low, medium, high
   omniglue:
@@ -164,6 +177,16 @@ matcher_zoo:
       paper: https://arxiv.org/pdf/2404.09692
       project: null
       display: true
   cotr:
     enable: false
     skip_ci: true

       # low, medium, high
       efficiency: low
+  dad(RoMa):
+    matcher: dad_roma
+    skip_ci: true
+    dense: true
+    info:
+      name: Dad(RoMa) #dispaly name
+      source: "ARXIV 2025"
+      github: https://github.com/example/example
+      paper: https://arxiv.org/abs/2503.07347
+      display: true
+      efficiency: low  # low, medium, high
   minima(loftr):
     matcher: minima_loftr
     dense: true
       name: MINIMA(LoFTR) #dispaly name
       source: "ARXIV 2024"
       paper: https://arxiv.org/abs/2412.19412
+      github: https://github.com/LSXI7/MINIMA
       display: true
   minima(RoMa):
     matcher: minima_roma
       name: MINIMA(RoMa) #dispaly name
       source: "ARXIV 2024"
       paper: https://arxiv.org/abs/2412.19412
+      github: https://github.com/LSXI7/MINIMA
       display: false
       efficiency: low  # low, medium, high
   omniglue:
       paper: https://arxiv.org/pdf/2404.09692
       project: null
       display: true
+  jamma:
+    matcher: jamma
+    dense: true
+    info:
+      name: Jamma #dispaly name
+      source: "CVPR 2024"
+      github: https://github.com/OnderT/XoFTR
+      paper: https://arxiv.org/pdf/2404.09692
+      project: null
+      display: false
   cotr:
     enable: false
     skip_ci: true

imcui/hloc/match_dense.py CHANGED Viewed

@@ -102,6 +102,23 @@ confs = {
         "max_error": 1,  # max error for assigned keypoints (in px)
         "cell_size": 1,  # size of quantization patch (max 1 kp/patch)
     },
     # "loftr_quadtree": {
     #     "output": "matches-loftr-quadtree",
     #     "model": {
@@ -295,7 +312,25 @@ confs = {
         },
         "preprocessing": {
             "grayscale": False,
-            "force_resize": True,
             "resize_max": 1024,
             "width": 320,
             "height": 240,
@@ -1010,9 +1045,17 @@ def match_images(model, image_0, image_1, conf, device="cpu"):
     # Rescale keypoints and move to cpu
     if "keypoints0" in pred.keys() and "keypoints1" in pred.keys():
         kpts0, kpts1 = pred["keypoints0"], pred["keypoints1"]
         kpts0_origin = scale_keypoints(kpts0 + 0.5, s0) - 0.5
         kpts1_origin = scale_keypoints(kpts1 + 0.5, s1) - 0.5
         ret = {
             "image0": image0.squeeze().cpu().numpy(),
             "image1": image1.squeeze().cpu().numpy(),
@@ -1022,10 +1065,10 @@ def match_images(model, image_0, image_1, conf, device="cpu"):
             "keypoints1": kpts1.cpu().numpy(),
             "keypoints0_orig": kpts0_origin.cpu().numpy(),
             "keypoints1_orig": kpts1_origin.cpu().numpy(),
-            "mkeypoints0": kpts0.cpu().numpy(),
-            "mkeypoints1": kpts1.cpu().numpy(),
-            "mkeypoints0_orig": kpts0_origin.cpu().numpy(),
-            "mkeypoints1_orig": kpts1_origin.cpu().numpy(),
             "original_size0": np.array(image_0.shape[:2][::-1]),
             "original_size1": np.array(image_1.shape[:2][::-1]),
             "new_size0": np.array(image0.shape[-2:][::-1]),

         "max_error": 1,  # max error for assigned keypoints (in px)
         "cell_size": 1,  # size of quantization patch (max 1 kp/patch)
     },
+    "jamma": {
+        "output": "matches-jamma",
+        "model": {
+            "name": "jamma",
+            "weights": "jamma_weight.ckpt",
+            "max_keypoints": 2000,
+            "match_threshold": 0.3,
+        },
+        "preprocessing": {
+            "grayscale": True,
+            "resize_max": 1024,
+            "dfactor": 16,
+            "width": 832,
+            "height": 832,
+            "force_resize": True,
+        },
+    },
     # "loftr_quadtree": {
     #     "output": "matches-loftr-quadtree",
     #     "model": {
         },
         "preprocessing": {
             "grayscale": False,
+            "force_resize": False,
+            "resize_max": 1024,
+            "width": 320,
+            "height": 240,
+            "dfactor": 8,
+        },
+    },
+    "dad_roma": {
+        "output": "matches-dad_roma",
+        "model": {
+            "name": "dad_roma",
+            "weights": "outdoor",
+            "model_name": "roma_outdoor.pth",
+            "max_keypoints": 2000,
+            "match_threshold": 0.2,
+        },
+        "preprocessing": {
+            "grayscale": False,
+            "force_resize": False,
             "resize_max": 1024,
             "width": 320,
             "height": 240,
     # Rescale keypoints and move to cpu
     if "keypoints0" in pred.keys() and "keypoints1" in pred.keys():
         kpts0, kpts1 = pred["keypoints0"], pred["keypoints1"]
+        mkpts0, mkpts1 = pred.get("mkeypoints0"), pred.get("mkeypoints1")
+        if mkpts0 is None or mkpts1 is None:
+            mkpts0 = kpts0
+            mkpts1 = kpts1
         kpts0_origin = scale_keypoints(kpts0 + 0.5, s0) - 0.5
         kpts1_origin = scale_keypoints(kpts1 + 0.5, s1) - 0.5
+        mkpts0_origin = scale_keypoints(mkpts0 + 0.5, s0) - 0.5
+        mkpts1_origin = scale_keypoints(mkpts1 + 0.5, s1) - 0.5
         ret = {
             "image0": image0.squeeze().cpu().numpy(),
             "image1": image1.squeeze().cpu().numpy(),
             "keypoints1": kpts1.cpu().numpy(),
             "keypoints0_orig": kpts0_origin.cpu().numpy(),
             "keypoints1_orig": kpts1_origin.cpu().numpy(),
+            "mkeypoints0": mkpts0.cpu().numpy(),
+            "mkeypoints1": mkpts1.cpu().numpy(),
+            "mkeypoints0_orig": mkpts0_origin.cpu().numpy(),
+            "mkeypoints1_orig": mkpts1_origin.cpu().numpy(),
             "original_size0": np.array(image_0.shape[:2][::-1]),
             "original_size1": np.array(image_1.shape[:2][::-1]),
             "new_size0": np.array(image0.shape[-2:][::-1]),

imcui/hloc/matchers/dad_roma.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import sys
+from pathlib import Path
+import tempfile
+import torch
+from PIL import Image
+from .. import MODEL_REPO_ID, logger
+from ..utils.base_model import BaseModel
+roma_path = Path(__file__).parent / "../../third_party/RoMa"
+sys.path.append(str(roma_path))
+from romatch.models.model_zoo import roma_model
+dad_path = Path(__file__).parent / "../../third_party/dad"
+sys.path.append(str(dad_path))
+import dad as dad_detector
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+class Dad(BaseModel):
+    default_conf = {
+        "name": "two_view_pipeline",
+        "model_name": "roma_outdoor.pth",
+        "model_utils_name": "dinov2_vitl14_pretrain.pth",
+        "max_keypoints": 3000,
+        "coarse_res": (560, 560),
+        "upsample_res": (864, 1152),
+    }
+    required_inputs = [
+        "image0",
+        "image1",
+    ]
+    # Initialize the line matcher
+    def _init(self, conf):
+        model_path = self._download_model(
+            repo_id=MODEL_REPO_ID,
+            filename="{}/{}".format("roma", self.conf["model_name"]),
+        )
+        dinov2_weights = self._download_model(
+            repo_id=MODEL_REPO_ID,
+            filename="{}/{}".format("roma", self.conf["model_utils_name"]),
+        )
+        logger.info("Loading Dad + Roma model")
+        # load the model
+        weights = torch.load(model_path, map_location="cpu")
+        dinov2_weights = torch.load(dinov2_weights, map_location="cpu")
+        if str(device) == "cpu":
+            amp_dtype = torch.float32
+        else:
+            amp_dtype = torch.float16
+        self.matcher = roma_model(
+            resolution=self.conf["coarse_res"],
+            upsample_preds=True,
+            weights=weights,
+            dinov2_weights=dinov2_weights,
+            device=device,
+            amp_dtype=amp_dtype,
+        )
+        self.matcher.upsample_res = self.conf["upsample_res"]
+        self.matcher.symmetric = False
+        self.detector = dad_detector.load_DaD()
+        logger.info("Load Dad + Roma model done.")
+    def _forward(self, data):
+        img0 = data["image0"].cpu().numpy().squeeze() * 255
+        img1 = data["image1"].cpu().numpy().squeeze() * 255
+        img0 = img0.transpose(1, 2, 0)
+        img1 = img1.transpose(1, 2, 0)
+        img0 = Image.fromarray(img0.astype("uint8"))
+        img1 = Image.fromarray(img1.astype("uint8"))
+        W_A, H_A = img0.size
+        W_B, H_B = img1.size
+        # hack: bad way to save then match
+        with (
+            tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_img0,
+            tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_img1,
+        ):
+            img0_path = temp_img0.name
+            img1_path = temp_img1.name
+            img0.save(img0_path)
+            img1.save(img1_path)
+        # Match
+        warp, certainty = self.matcher.match(img0_path, img1_path, device=device)
+        # Detect
+        keypoints_A = self.detector.detect_from_path(
+            img0_path,
+            num_keypoints=self.conf["max_keypoints"],
+        )["keypoints"][0]
+        keypoints_B = self.detector.detect_from_path(
+            img1_path,
+            num_keypoints=self.conf["max_keypoints"],
+        )["keypoints"][0]
+        matches = self.matcher.match_keypoints(
+            keypoints_A,
+            keypoints_B,
+            warp,
+            certainty,
+            return_tuple=False,
+        )
+        # Sample matches for estimation
+        kpts1, kpts2 = self.matcher.to_pixel_coordinates(matches, H_A, W_A, H_B, W_B)
+        offset = self.detector.topleft - 0
+        kpts1, kpts2 = kpts1 - offset, kpts2 - offset
+        pred = {
+            "keypoints0": self.matcher._to_pixel_coordinates(keypoints_A, H_A, W_A),
+            "keypoints1": self.matcher._to_pixel_coordinates(keypoints_B, H_B, W_B),
+            "mkeypoints0": kpts1,
+            "mkeypoints1": kpts2,
+            "mconf": torch.ones_like(kpts1[:, 0]),
+        }
+        return pred

imcui/hloc/matchers/roma.py CHANGED Viewed

@@ -20,6 +20,8 @@ class Roma(BaseModel):
         "model_name": "roma_outdoor.pth",
         "model_utils_name": "dinov2_vitl14_pretrain.pth",
         "max_keypoints": 3000,
     }
     required_inputs = [
         "image0",
@@ -43,15 +45,19 @@ class Roma(BaseModel):
         weights = torch.load(model_path, map_location="cpu")
         dinov2_weights = torch.load(dinov2_weights, map_location="cpu")
         self.net = roma_model(
-            resolution=(14 * 8 * 6, 14 * 8 * 6),
-            upsample_preds=False,
             weights=weights,
             dinov2_weights=dinov2_weights,
             device=device,
-            # temp fix issue: https://github.com/Parskatt/RoMa/issues/26
-            amp_dtype=torch.float32,
         )
         logger.info("Load Roma model done.")
     def _forward(self, data):

         "model_name": "roma_outdoor.pth",
         "model_utils_name": "dinov2_vitl14_pretrain.pth",
         "max_keypoints": 3000,
+        "coarse_res": (560, 560),
+        "upsample_res": (864, 1152),
     }
     required_inputs = [
         "image0",
         weights = torch.load(model_path, map_location="cpu")
         dinov2_weights = torch.load(dinov2_weights, map_location="cpu")
+        if str(device) == "cpu":
+            amp_dtype = torch.float32
+        else:
+            amp_dtype = torch.float16
         self.net = roma_model(
+            resolution=self.conf["coarse_res"],
+            upsample_preds=True,
             weights=weights,
             dinov2_weights=dinov2_weights,
             device=device,
+            amp_dtype=amp_dtype,
         )
+        self.matcher.upsample_res = self.conf["upsample_res"]
         logger.info("Load Roma model done.")
     def _forward(self, data):

imcui/hloc/matchers/xfeat_dense.py CHANGED Viewed

@@ -47,8 +47,10 @@ class XFeatDense(BaseModel):
         # we use results from one batch
         matches = matches[0]
         pred = {
-            "keypoints0": matches[:, :2],
-            "keypoints1": matches[:, 2:],
             "mconf": torch.ones_like(matches[:, 0]),
         }
         return pred

         # we use results from one batch
         matches = matches[0]
         pred = {
+            "keypoints0": out0["keypoints"].squeeze(),
+            "keypoints1": out1["keypoints"].squeeze(),
+            "mkeypoints0": matches[:, :2],
+            "mkeypoints1": matches[:, 2:],
             "mconf": torch.ones_like(matches[:, 0]),
         }
         return pred

imcui/hloc/matchers/xfeat_lightglue.py CHANGED Viewed

@@ -41,8 +41,10 @@ class XFeatLightGlue(BaseModel):
         mkpts_0 = torch.from_numpy(mkpts_0)  # n x 2
         mkpts_1 = torch.from_numpy(mkpts_1)  # n x 2
         pred = {
-            "keypoints0": mkpts_0,
-            "keypoints1": mkpts_1,
             "mconf": torch.ones_like(mkpts_0[:, 0]),
         }
         return pred

         mkpts_0 = torch.from_numpy(mkpts_0)  # n x 2
         mkpts_1 = torch.from_numpy(mkpts_1)  # n x 2
         pred = {
+            "keypoints0": out0["keypoints"].squeeze(),
+            "keypoints1": out1["keypoints"].squeeze(),
+            "mkeypoints0": mkpts_0,
+            "mkeypoints1": mkpts_1,
             "mconf": torch.ones_like(mkpts_0[:, 0]),
         }
         return pred

imcui/third_party/RoMa/.gitignore ADDED Viewed

	@@ -0,0 +1,11 @@

+*.egg-info*
+*.vscode*
+*__pycache__*
+vis*
+workspace*
+.venv
+.DS_Store
+jobs/*
+*ignore_me*
+*.pth
+wandb*

imcui/third_party/RoMa/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2023 Johan Edstedt
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

imcui/third_party/RoMa/README.md ADDED Viewed

	@@ -0,0 +1,123 @@

+#
+<p align="center">
+  <h1 align="center"> <ins>RoMa</ins> 🏛️:<br> Robust Dense Feature Matching <br> ⭐CVPR 2024⭐</h1>
+  <p align="center">
+    <a href="https://scholar.google.com/citations?user=Ul-vMR0AAAAJ">Johan Edstedt</a>
+    ·
+    <a href="https://scholar.google.com/citations?user=HS2WuHkAAAAJ">Qiyu Sun</a>
+    ·
+    <a href="https://scholar.google.com/citations?user=FUE3Wd0AAAAJ">Georg Bökman</a>
+    ·
+    <a href="https://scholar.google.com/citations?user=6WRQpCQAAAAJ">Mårten Wadenbäck</a>
+    ·
+    <a href="https://scholar.google.com/citations?user=lkWfR08AAAAJ">Michael Felsberg</a>
+  </p>
+  <h2 align="center"><p>
+    <a href="https://arxiv.org/abs/2305.15404" align="center">Paper</a> |
+    <a href="https://parskatt.github.io/RoMa" align="center">Project Page</a>
+  </p></h2>
+  <div align="center"></div>
+</p>
+<br/>
+<p align="center">
+    <img src="https://github.com/Parskatt/RoMa/assets/22053118/15d8fea7-aa6d-479f-8a93-350d950d006b" alt="example" width=80%>
+    <br>
+    <em>RoMa is the robust dense feature matcher capable of estimating pixel-dense warps and reliable certainties for almost any image pair.</em>
+</p>
+## Setup/Install
+In your python environment (tested on Linux python 3.10), run:
+```bash
+pip install -e .
+```
+## Demo / How to Use
+We provide two demos in the [demos folder](demo).
+Here's the gist of it:
+```python
+from romatch import roma_outdoor
+roma_model = roma_outdoor(device=device)
+# Match
+warp, certainty = roma_model.match(imA_path, imB_path, device=device)
+# Sample matches for estimation
+matches, certainty = roma_model.sample(warp, certainty)
+# Convert to pixel coordinates (RoMa produces matches in [-1,1]x[-1,1])
+kptsA, kptsB = roma_model.to_pixel_coordinates(matches, H_A, W_A, H_B, W_B)
+# Find a fundamental matrix (or anything else of interest)
+F, mask = cv2.findFundamentalMat(
+    kptsA.cpu().numpy(), kptsB.cpu().numpy(), ransacReprojThreshold=0.2, method=cv2.USAC_MAGSAC, confidence=0.999999, maxIters=10000
+)
+```
+**New**: You can also match arbitrary keypoints with RoMa. See [match_keypoints](romatch/models/matcher.py) in RegressionMatcher.
+## Settings
+### Resolution
+By default RoMa uses an initial resolution of (560,560) which is then upsampled to (864,864).
+You can change this at construction (see roma_outdoor kwargs).
+You can also change this later, by changing the roma_model.w_resized, roma_model.h_resized, and roma_model.upsample_res.
+### Sampling
+roma_model.sample_thresh controls the thresholding used when sampling matches for estimation. In certain cases a lower or higher threshold may improve results.
+## Reproducing Results
+The experiments in the paper are provided in the [experiments folder](experiments).
+### Training
+1. First follow the instructions provided here: https://github.com/Parskatt/DKM for downloading and preprocessing datasets.
+2. Run the relevant experiment, e.g.,
+```bash
+torchrun --nproc_per_node=4 --nnodes=1 --rdzv_backend=c10d experiments/roma_outdoor.py
+```
+### Testing
+```bash
+python experiments/roma_outdoor.py --only_test --benchmark mega-1500
+```
+## License
+All our code except DINOv2 is MIT license.
+DINOv2 has an Apache 2 license [DINOv2](https://github.com/facebookresearch/dinov2/blob/main/LICENSE).
+## Acknowledgement
+Our codebase builds on the code in [DKM](https://github.com/Parskatt/DKM).
+## Tiny RoMa
+If you find that RoMa is too heavy, you might want to try Tiny RoMa which is built on top of XFeat.
+```python
+from romatch import tiny_roma_v1_outdoor
+tiny_roma_model = tiny_roma_v1_outdoor(device=device)
+```
+Mega1500:
+|  | AUC@5 | AUC@10 | AUC@20 |
+|----------|----------|----------|----------|
+| XFeat    | 46.4    | 58.9    | 69.2    |
+| XFeat*    |  51.9   | 67.2    | 78.9    |
+| Tiny RoMa v1    | 56.4 | 69.5 | 79.5     |
+| RoMa    |  -   | -    | -    |
+Mega-8-Scenes (See DKM):
+|  | AUC@5 | AUC@10 | AUC@20 |
+|----------|----------|----------|----------|
+| XFeat    | -    | -    | -    |
+| XFeat*    |  50.1   | 64.4    | 75.2    |
+| Tiny RoMa v1    | 57.7 | 70.5 | 79.6     |
+| RoMa    |  -   | -    | -    |
+IMC22 :'):
+|  | mAA@10 |
+|----------|----------|
+| XFeat    | 42.1    |
+| XFeat*    |  -   |
+| Tiny RoMa v1    | 42.2 |
+| RoMa    |  -   |
+## BibTeX
+If you find our models useful, please consider citing our paper!
+```
+@article{edstedt2024roma,
+title={{RoMa: Robust Dense Feature Matching}},
+author={Edstedt, Johan and Sun, Qiyu and Bökman, Georg and Wadenbäck, Mårten and Felsberg, Michael},
+journal={IEEE Conference on Computer Vision and Pattern Recognition},
+year={2024}
+}
+```

imcui/third_party/RoMa/data/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *
2	+ !.gitignore

imcui/third_party/RoMa/requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+torch
+einops
+torchvision
+opencv-python
+kornia
+albumentations
+loguru
+tqdm
+matplotlib
+h5py
+wandb
+timm
+poselib
+#xformers # Optional, used for memefficient attention

imcui/third_party/RoMa/romatch/models/matcher.py CHANGED Viewed

@@ -11,7 +11,7 @@ from PIL import Image
 from romatch.utils import get_tuple_transform_ops
 from romatch.utils.local_correlation import local_correlation
-from romatch.utils.utils import cls_to_flow_refine, get_autocast_params
 from romatch.utils.kde import kde
 class ConvRefiner(nn.Module):
@@ -573,12 +573,30 @@ class RegressionMatcher(nn.Module):
         kpts_B = torch.stack((2/W_B * kpts_B[...,0] - 1, 2/H_B * kpts_B[...,1] - 1),axis=-1)
         return kpts_A, kpts_B
-    def match_keypoints(self, x_A, x_B, warp, certainty, return_tuple = True, return_inds = False):
-        x_A_to_B = F.grid_sample(warp[...,-2:].permute(2,0,1)[None], x_A[None,None], align_corners = False, mode = "bilinear")[0,:,0].mT
-        cert_A_to_B = F.grid_sample(certainty[None,None,...], x_A[None,None], align_corners = False, mode = "bilinear")[0,0,0]
         D = torch.cdist(x_A_to_B, x_B)
-        inds_A, inds_B = torch.nonzero((D == D.min(dim=-1, keepdim = True).values) * (D == D.min(dim=-2, keepdim = True).values) * (cert_A_to_B[:,None] > self.sample_thresh), as_tuple = True)
         if return_tuple:
             if return_inds:
                 return inds_A, inds_B
@@ -586,25 +604,38 @@ class RegressionMatcher(nn.Module):
                 return x_A[inds_A], x_B[inds_B]
         else:
             if return_inds:
-                return torch.cat((inds_A, inds_B),dim=-1)
             else:
-                return torch.cat((x_A[inds_A], x_B[inds_B]),dim=-1)
     @torch.inference_mode()
     def match(
         self,
-        im_A_path,
-        im_B_path,
         *args,
         batched=False,
-        device = None,
     ):
         if device is None:
             device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        if isinstance(im_A_path, (str, os.PathLike)):
-            im_A, im_B = Image.open(im_A_path).convert("RGB"), Image.open(im_B_path).convert("RGB")
         else:
-            im_A, im_B = im_A_path, im_B_path
         symmetric = self.symmetric
         self.train(False)
@@ -616,9 +647,9 @@ class RegressionMatcher(nn.Module):
                 # Get images in good format
                 ws = self.w_resized
                 hs = self.h_resized
                 test_transform = get_tuple_transform_ops(
-                    resize=(hs, ws), normalize=True, clahe = False
                 )
                 im_A, im_B = test_transform((im_A, im_B))
                 batch = {"im_A": im_A[None].to(device), "im_B": im_B[None].to(device)}
@@ -633,20 +664,20 @@ class RegressionMatcher(nn.Module):
             finest_scale = 1
             # Run matcher
             if symmetric:
-                corresps  = self.forward_symmetric(batch)
             else:
-                corresps = self.forward(batch, batched = True)
             if self.upsample_preds:
                 hs, ws = self.upsample_res
             if self.attenuate_cert:
                 low_res_certainty = F.interpolate(
-                corresps[16]["certainty"], size=(hs, ws), align_corners=False, mode="bilinear"
                 )
                 cert_clamp = 0
                 factor = 0.5
-                low_res_certainty = factor*low_res_certainty*(low_res_certainty < cert_clamp)
             if self.upsample_preds:
                 finest_corresps = corresps[finest_scale]
@@ -654,34 +685,39 @@ class RegressionMatcher(nn.Module):
                 test_transform = get_tuple_transform_ops(
                     resize=(hs, ws), normalize=True
                 )
-                im_A, im_B = test_transform((Image.open(im_A_path).convert('RGB'), Image.open(im_B_path).convert('RGB')))
                 im_A, im_B = im_A[None].to(device), im_B[None].to(device)
                 scale_factor = math.sqrt(self.upsample_res[0] * self.upsample_res[1] / (self.w_resized * self.h_resized))
                 batch = {"im_A": im_A, "im_B": im_B, "corresps": finest_corresps}
                 if symmetric:
-                    corresps = self.forward_symmetric(batch, upsample = True, batched=True, scale_factor = scale_factor)
                 else:
-                    corresps = self.forward(batch, batched = True, upsample=True, scale_factor = scale_factor)
-            im_A_to_im_B = corresps[finest_scale]["flow"]
             certainty = corresps[finest_scale]["certainty"] - (low_res_certainty if self.attenuate_cert else 0)
             if finest_scale != 1:
                 im_A_to_im_B = F.interpolate(
-                im_A_to_im_B, size=(hs, ws), align_corners=False, mode="bilinear"
                 )
                 certainty = F.interpolate(
-                certainty, size=(hs, ws), align_corners=False, mode="bilinear"
                 )
             im_A_to_im_B = im_A_to_im_B.permute(
                 0, 2, 3, 1
-                )
             # Create im_A meshgrid
             im_A_coords = torch.meshgrid(
                 (
                     torch.linspace(-1 + 1 / hs, 1 - 1 / hs, hs, device=device),
                     torch.linspace(-1 + 1 / ws, 1 - 1 / ws, ws, device=device),
                 ),
-                indexing = 'ij'
             )
             im_A_coords = torch.stack((im_A_coords[1], im_A_coords[0]))
             im_A_coords = im_A_coords[None].expand(b, 2, hs, ws)
@@ -689,14 +725,14 @@ class RegressionMatcher(nn.Module):
             im_A_coords = im_A_coords.permute(0, 2, 3, 1)
             if (im_A_to_im_B.abs() > 1).any() and True:
                 wrong = (im_A_to_im_B.abs() > 1).sum(dim=-1) > 0
-                certainty[wrong[:,None]] = 0
             im_A_to_im_B = torch.clamp(im_A_to_im_B, -1, 1)
             if symmetric:
                 A_to_B, B_to_A = im_A_to_im_B.chunk(2)
                 q_warp = torch.cat((im_A_coords, A_to_B), dim=-1)
                 im_B_coords = im_A_coords
                 s_warp = torch.cat((B_to_A, im_B_coords), dim=-1)
-                warp = torch.cat((q_warp, s_warp),dim=2)
                 certainty = torch.cat(certainty.chunk(2), dim=3)
             else:
                 warp = torch.cat((im_A_coords, im_A_to_im_B), dim=-1)

 from romatch.utils import get_tuple_transform_ops
 from romatch.utils.local_correlation import local_correlation
+from romatch.utils.utils import check_rgb, cls_to_flow_refine, get_autocast_params, check_not_i16
 from romatch.utils.kde import kde
 class ConvRefiner(nn.Module):
         kpts_B = torch.stack((2/W_B * kpts_B[...,0] - 1, 2/H_B * kpts_B[...,1] - 1),axis=-1)
         return kpts_A, kpts_B
+    def match_keypoints(
+        self, x_A, x_B, warp, certainty, return_tuple=True, return_inds=False, max_dist = 0.005, cert_th = 0,
+    ):
+        x_A_to_B = F.grid_sample(
+            warp[..., -2:].permute(2, 0, 1)[None],
+            x_A[None, None],
+            align_corners=False,
+            mode="bilinear",
+        )[0, :, 0].mT
+        cert_A_to_B = F.grid_sample(
+            certainty[None, None, ...],
+            x_A[None, None],
+            align_corners=False,
+            mode="bilinear",
+        )[0, 0, 0]
         D = torch.cdist(x_A_to_B, x_B)
+        inds_A, inds_B = torch.nonzero(
+            (D == D.min(dim=-1, keepdim=True).values)
+            * (D == D.min(dim=-2, keepdim=True).values)
+            * (cert_A_to_B[:, None] > cert_th)
+            * (D < max_dist),
+            as_tuple=True,
+        )
         if return_tuple:
             if return_inds:
                 return inds_A, inds_B
                 return x_A[inds_A], x_B[inds_B]
         else:
             if return_inds:
+                return torch.cat((inds_A, inds_B), dim=-1)
             else:
+                return torch.cat((x_A[inds_A], x_B[inds_B]), dim=-1)
     @torch.inference_mode()
     def match(
         self,
+        im_A_input,
+        im_B_input,
         *args,
         batched=False,
+        device=None,
     ):
         if device is None:
             device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        # Check if inputs are file paths or already loaded images
+        if isinstance(im_A_input, (str, os.PathLike)):
+            im_A = Image.open(im_A_input)
+            check_not_i16(im_A)
+            im_A = im_A.convert("RGB")
+        else:
+            check_rgb(im_A_input)
+            im_A = im_A_input
+        if isinstance(im_B_input, (str, os.PathLike)):
+            im_B = Image.open(im_B_input)
+            check_not_i16(im_B)
+            im_B = im_B.convert("RGB")
         else:
+            check_rgb(im_B_input)
+            im_B = im_B_input
         symmetric = self.symmetric
         self.train(False)
                 # Get images in good format
                 ws = self.w_resized
                 hs = self.h_resized
                 test_transform = get_tuple_transform_ops(
+                    resize=(hs, ws), normalize=True, clahe=False
                 )
                 im_A, im_B = test_transform((im_A, im_B))
                 batch = {"im_A": im_A[None].to(device), "im_B": im_B[None].to(device)}
             finest_scale = 1
             # Run matcher
             if symmetric:
+                corresps = self.forward_symmetric(batch)
             else:
+                corresps = self.forward(batch, batched=True)
             if self.upsample_preds:
                 hs, ws = self.upsample_res
             if self.attenuate_cert:
                 low_res_certainty = F.interpolate(
+                    corresps[16]["certainty"], size=(hs, ws), align_corners=False, mode="bilinear"
                 )
                 cert_clamp = 0
                 factor = 0.5
+                low_res_certainty = factor * low_res_certainty * (low_res_certainty < cert_clamp)
             if self.upsample_preds:
                 finest_corresps = corresps[finest_scale]
                 test_transform = get_tuple_transform_ops(
                     resize=(hs, ws), normalize=True
                 )
+                if isinstance(im_A_input, (str, os.PathLike)):
+                    im_A, im_B = test_transform(
+                        (Image.open(im_A_input).convert('RGB'), Image.open(im_B_input).convert('RGB')))
+                else:
+                    im_A, im_B = test_transform((im_A_input, im_B_input))
                 im_A, im_B = im_A[None].to(device), im_B[None].to(device)
                 scale_factor = math.sqrt(self.upsample_res[0] * self.upsample_res[1] / (self.w_resized * self.h_resized))
                 batch = {"im_A": im_A, "im_B": im_B, "corresps": finest_corresps}
                 if symmetric:
+                    corresps = self.forward_symmetric(batch, upsample=True, batched=True, scale_factor=scale_factor)
                 else:
+                    corresps = self.forward(batch, batched=True, upsample=True, scale_factor=scale_factor)
+            im_A_to_im_B = corresps[finest_scale]["flow"]
             certainty = corresps[finest_scale]["certainty"] - (low_res_certainty if self.attenuate_cert else 0)
             if finest_scale != 1:
                 im_A_to_im_B = F.interpolate(
+                    im_A_to_im_B, size=(hs, ws), align_corners=False, mode="bilinear"
                 )
                 certainty = F.interpolate(
+                    certainty, size=(hs, ws), align_corners=False, mode="bilinear"
                 )
             im_A_to_im_B = im_A_to_im_B.permute(
                 0, 2, 3, 1
+            )
             # Create im_A meshgrid
             im_A_coords = torch.meshgrid(
                 (
                     torch.linspace(-1 + 1 / hs, 1 - 1 / hs, hs, device=device),
                     torch.linspace(-1 + 1 / ws, 1 - 1 / ws, ws, device=device),
                 ),
+                indexing='ij'
             )
             im_A_coords = torch.stack((im_A_coords[1], im_A_coords[0]))
             im_A_coords = im_A_coords[None].expand(b, 2, hs, ws)
             im_A_coords = im_A_coords.permute(0, 2, 3, 1)
             if (im_A_to_im_B.abs() > 1).any() and True:
                 wrong = (im_A_to_im_B.abs() > 1).sum(dim=-1) > 0
+                certainty[wrong[:, None]] = 0
             im_A_to_im_B = torch.clamp(im_A_to_im_B, -1, 1)
             if symmetric:
                 A_to_B, B_to_A = im_A_to_im_B.chunk(2)
                 q_warp = torch.cat((im_A_coords, A_to_B), dim=-1)
                 im_B_coords = im_A_coords
                 s_warp = torch.cat((B_to_A, im_B_coords), dim=-1)
+                warp = torch.cat((q_warp, s_warp), dim=2)
                 certainty = torch.cat(certainty.chunk(2), dim=3)
             else:
                 warp = torch.cat((im_A_coords, im_A_to_im_B), dim=-1)

imcui/third_party/RoMa/romatch/models/transformer/layers/attention.py CHANGED Viewed

@@ -22,7 +22,7 @@ try:
     XFORMERS_AVAILABLE = True
 except ImportError:
-    logger.warning("xFormers not available")
     XFORMERS_AVAILABLE = False

     XFORMERS_AVAILABLE = True
 except ImportError:
+    # logger.warning("xFormers not available")
     XFORMERS_AVAILABLE = False

imcui/third_party/RoMa/romatch/models/transformer/layers/block.py CHANGED Viewed

@@ -29,7 +29,7 @@ try:
     XFORMERS_AVAILABLE = True
 except ImportError:
-    logger.warning("xFormers not available")
     XFORMERS_AVAILABLE = False

     XFORMERS_AVAILABLE = True
 except ImportError:
+    # logger.warning("xFormers not available")
     XFORMERS_AVAILABLE = False

imcui/third_party/RoMa/romatch/utils/utils.py CHANGED Viewed

@@ -651,4 +651,12 @@ def get_autocast_params(device=None, enabled=False, dtype=None):
         enabled = False
         # mps is not supported
         autocast_device = "cpu"
-    return autocast_device, enabled, out_dtype

         enabled = False
         # mps is not supported
         autocast_device = "cpu"
+    return autocast_device, enabled, out_dtype
+def check_not_i16(im):
+    if im.mode == "I;16":
+        raise NotImplementedError("Can't handle 16 bit images")
+def check_rgb(im):
+    if im.mode != "RGB":
+        raise NotImplementedError("Can't handle non-RGB images")

imcui/third_party/RoMa/setup.py CHANGED Viewed

@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
 setup(
     name="romatch",
     packages=find_packages(include=("romatch*",)),
-    version="0.0.1",
     author="Johan Edstedt",
     install_requires=open("requirements.txt", "r").read().split("\n"),
 )

 setup(
     name="romatch",
     packages=find_packages(include=("romatch*",)),
+    version="0.0.2",
     author="Johan Edstedt",
     install_requires=open("requirements.txt", "r").read().split("\n"),
 )

imcui/third_party/dad/.gitignore ADDED Viewed

	@@ -0,0 +1,170 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+.vscode*
+*.pth
+wandb
+*.out
+vis/
+workspace/
+.DS_Store
+*.tar

imcui/third_party/dad/.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.10

imcui/third_party/dad/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Johan Edstedt
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

imcui/third_party/dad/README.md ADDED Viewed

	@@ -0,0 +1,130 @@

+<p align="center">
+  <h1 align="center"> <ins>DaD:</ins> Distilled Reinforcement Learning for Diverse Keypoint Detection</h1>
+  <p align="center">
+    <a href="https://scholar.google.com/citations?user=Ul-vMR0AAAAJ">Johan Edstedt</a>
+    ·
+    <a href="https://scholar.google.com/citations?user=FUE3Wd0AAAAJ">Georg Bökman</a>
+    ·
+    <a href="https://scholar.google.com/citations?user=6WRQpCQAAAAJ">Mårten Wadenbäck</a>
+    ·
+    <a href="https://scholar.google.com/citations?user=lkWfR08AAAAJ">Michael Felsberg</a>
+  </p>
+  <h2 align="center"><p>
+    <a href="https://arxiv.org/abs/2503.07347" align="center">Paper</a>
+  </p></h2>
+  <p align="center">
+      <img src="assets/qualitative.jpg" alt="example" width=80%>
+      <br>
+      <em>DaD's a pretty good keypoint detector, probably the best.</em>
+  </p>
+</p>
+<p align="center">
+</p>
+## Run
+```python
+import dad
+from PIL import Image
+img_path = "assets/0015_A.jpg"
+W, H = Image.open(img_path).size# your image shape,
+detector = dad.load_DaD()
+detections = detector.detect_from_path(
+  img_path,
+  num_keypoints = 512,
+  return_dense_probs=True)
+detections["keypoints"] # 1 x 512 x 2, normalized coordinates of keypoints
+detector.to_pixel_coords(detections["keypoints"], H, W)
+detections["keypoint_probs"] # 1 x 512, probs of sampled keypoints
+detections["dense_probs"] # 1 x H x W, probability map
+```
+## Visualize
+```python
+import dad
+from dad.utils import visualize_keypoints
+detector = dad.load_DaD()
+img_path = "assets/0015_A.jpg"
+vis_path = "vis/0015_A_dad.jpg"
+visualize_keypoints(img_path, vis_path, detector, num_keypoints = 512)
+```
+## Install
+Get uv
+```bash
+curl -LsSf https://astral.sh/uv/install.sh | sh
+```
+### In an existing env
+Assuming you already have some env active:
+```bash
+uv pip install dad@git+https://github.com/Parskatt/dad.git
+```
+### As a project
+For dev, etc:
+```bash
+git clone [email protected]:Parskatt/dad.git
+uv sync
+source .venv/bin/activate
+```
+## Evaluation
+For to evaluate, e.g., DaD on ScanNet1500 with 512 keypoints, run
+```bash
+python experiments/benchmark.py --detector DaD --num_keypoints 512 --benchmark ScanNet1500
+```
+Note: leaving out num_keypoints will run the benchmark for all numbers of keypoints, i.e., [512, 1024, 2048, 4096, 8192].
+### Third party detectors
+We provide wrappers for a somewhat large set of previous detectors,
+```bash
+python experiments/benchmark.py --help
+```
+## Training
+To train our final model from the emergent light and dark detector, run
+```bash
+python experiments/repro_paper_results/distill.py
+```
+The emergent models come from running
+```bash
+python experiments/repro_paper_results/rl.py
+```
+Note however that the types of detectors that come from this type of training is stochastic, and you may need to do several runs to get a detector that matches our results.
+## How I run experiments
+(Note: You don't have to do this, it's just how I do it.)
+At the start of a new day I typically run
+```bash
+python new_day.py
+```
+This creates a new folder in experiments, e.g., `experiments/w11/monday`.
+I then typically just copy the contents of a previous experiment, e.g.,
+```bash
+cp experiments/repro_paper_results/rl.py experiments/w11/monday/new-cool-hparams.py
+```
+Change whatever you want to change in `experiments/w11/monday/new-cool-hparams.py`.
+Then run it with
+```bash
+python experiments/w11/monday/new-cool-hparams.py
+```
+This will be tracked in wandb as `w11-monday-new-cool-hparams` in the `DaD` project.
+You might not want to track stuff, and perhaps display some debugstuff, then you can run instead as, which also won't log to wandb
+```bash
+DEBUG=1 python experiments/w11/monday/new-cool-hparams.py
+```
+## Evaluation Results
+TODO
+## Licenses
+DaD is MIT licensed.
+Third party detectors in [dad/detectors/third_party](dad/detectors/third_party) have their own licenses. If you use them, please refer to their respective licenses in [here](licenses) (NOTE: There may be more licenses you need to care about than the ones listed. Before using any third pary code, make sure you're following their respective license).
+## BibTeX
+```txt
+TODO
+```

imcui/third_party/dad/dad/__init__.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from .logging import logger as logger
+from .logging import configure_logger as configure_logger
+import os
+from .detectors import load_DaD as load_DaD
+from .detectors import dedode_detector_S as dedode_detector_S
+from .detectors import dedode_detector_B as dedode_detector_B
+from .detectors import dedode_detector_L as dedode_detector_L
+from .detectors import load_DaDDark as load_DaDDark
+from .detectors import load_DaDLight as load_DaDLight
+from .types import Detector as Detector
+from .types import Matcher as Matcher
+from .types import Benchmark as Benchmark
+configure_logger()
+DEBUG_MODE = bool(os.environ.get("DEBUG", False))
+RANK = 0
+GLOBAL_STEP = 0

imcui/third_party/dad/dad/augs.py ADDED Viewed

	@@ -0,0 +1,214 @@

+import random
+import warnings
+import numpy as np
+import torch
+from PIL import Image
+from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
+import cv2
+# From Patch2Pix https://github.com/GrumpyZhou/patch2pix
+def get_depth_tuple_transform_ops(resize=None, normalize=True, unscale=False):
+    ops = []
+    if resize:
+        ops.append(
+            TupleResize(resize, mode=InterpolationMode.BILINEAR, antialias=False)
+        )
+    return TupleCompose(ops)
+def get_tuple_transform_ops(resize=None, normalize=True, unscale=False, clahe=False):
+    ops = []
+    if resize:
+        ops.append(TupleResize(resize, antialias=True))
+    if clahe:
+        ops.append(TupleClahe())
+    if normalize:
+        ops.append(TupleToTensorScaled())
+        ops.append(
+            TupleNormalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        )  # Imagenet mean/std
+    else:
+        if unscale:
+            ops.append(TupleToTensorUnscaled())
+        else:
+            ops.append(TupleToTensorScaled())
+    return TupleCompose(ops)
+class Clahe:
+    def __init__(self, cliplimit=2, blocksize=8) -> None:
+        self.clahe = cv2.createCLAHE(cliplimit, (blocksize, blocksize))
+    def __call__(self, im):
+        im_hsv = cv2.cvtColor(np.array(im), cv2.COLOR_RGB2HSV)
+        im_v = self.clahe.apply(im_hsv[:, :, 2])
+        im_hsv[..., 2] = im_v
+        im_clahe = cv2.cvtColor(im_hsv, cv2.COLOR_HSV2RGB)
+        return Image.fromarray(im_clahe)
+class TupleClahe:
+    def __init__(self, cliplimit=8, blocksize=8) -> None:
+        self.clahe = Clahe(cliplimit, blocksize)
+    def __call__(self, ims):
+        return [self.clahe(im) for im in ims]
+class ToTensorScaled(object):
+    """Convert a RGB PIL Image to a CHW ordered Tensor, scale the range to [0, 1]"""
+    def __call__(self, im):
+        if not isinstance(im, torch.Tensor):
+            im = np.array(im, dtype=np.float32).transpose((2, 0, 1))
+            im /= 255.0
+            return torch.from_numpy(im)
+        else:
+            return im
+    def __repr__(self):
+        return "ToTensorScaled(./255)"
+class TupleToTensorScaled(object):
+    def __init__(self):
+        self.to_tensor = ToTensorScaled()
+    def __call__(self, im_tuple):
+        return [self.to_tensor(im) for im in im_tuple]
+    def __repr__(self):
+        return "TupleToTensorScaled(./255)"
+class ToTensorUnscaled(object):
+    """Convert a RGB PIL Image to a CHW ordered Tensor"""
+    def __call__(self, im):
+        return torch.from_numpy(np.array(im, dtype=np.float32).transpose((2, 0, 1)))
+    def __repr__(self):
+        return "ToTensorUnscaled()"
+class TupleToTensorUnscaled(object):
+    """Convert a RGB PIL Image to a CHW ordered Tensor"""
+    def __init__(self):
+        self.to_tensor = ToTensorUnscaled()
+    def __call__(self, im_tuple):
+        return [self.to_tensor(im) for im in im_tuple]
+    def __repr__(self):
+        return "TupleToTensorUnscaled()"
+class TupleResize(object):
+    def __init__(self, size, mode=InterpolationMode.BICUBIC, antialias=None):
+        self.size = size
+        self.resize = transforms.Resize(size, mode, antialias=antialias)
+    def __call__(self, im_tuple):
+        return [self.resize(im) for im in im_tuple]
+    def __repr__(self):
+        return "TupleResize(size={})".format(self.size)
+class Normalize:
+    def __call__(self, im):
+        mean = im.mean(dim=(1, 2), keepdims=True)
+        std = im.std(dim=(1, 2), keepdims=True)
+        return (im - mean) / std
+class TupleNormalize(object):
+    def __init__(self, mean, std):
+        self.mean = mean
+        self.std = std
+        self.normalize = transforms.Normalize(mean=mean, std=std)
+    def __call__(self, im_tuple):
+        c, h, w = im_tuple[0].shape
+        if c > 3:
+            warnings.warn(f"Number of channels {c=} > 3, assuming first 3 are rgb")
+        return [self.normalize(im[:3]) for im in im_tuple]
+    def __repr__(self):
+        return "TupleNormalize(mean={}, std={})".format(self.mean, self.std)
+class TupleCompose(object):
+    def __init__(self, transforms):
+        self.transforms = transforms
+    def __call__(self, im_tuple):
+        for t in self.transforms:
+            im_tuple = t(im_tuple)
+        return im_tuple
+    def __repr__(self):
+        format_string = self.__class__.__name__ + "("
+        for t in self.transforms:
+            format_string += "\n"
+            format_string += "    {0}".format(t)
+        format_string += "\n)"
+        return format_string
+def pad_kps(kps: torch.Tensor, pad_num_kps: int, value: int = -1):
+    assert len(kps.shape) == 2
+    N = len(kps)
+    padded_kps = value * torch.ones((pad_num_kps, 2)).to(kps)
+    padded_kps[:N] = kps
+    return padded_kps
+def crop(img: Image.Image, x: int, y: int, crop_size: int):
+    width, height = img.size
+    if width < crop_size or height < crop_size:
+        raise ValueError(f"Image dimensions must be at least {crop_size}x{crop_size}")
+    cropped_img = img.crop((x, y, x + crop_size, y + crop_size))
+    return cropped_img
+def random_crop(img: Image.Image, crop_size: int):
+    width, height = img.size
+    if width < crop_size or height < crop_size:
+        raise ValueError(f"Image dimensions must be at least {crop_size}x{crop_size}")
+    max_x = width - crop_size
+    max_y = height - crop_size
+    x = random.randint(0, max_x)
+    y = random.randint(0, max_y)
+    cropped_img = img.crop((x, y, x + crop_size, y + crop_size))
+    return cropped_img, (x, y)
+def luminance_negation(pil_img):
+    # Convert PIL RGB to numpy array
+    rgb_array = np.array(pil_img)
+    # Convert RGB to BGR (OpenCV format)
+    bgr = cv2.cvtColor(rgb_array, cv2.COLOR_RGB2BGR)
+    # Convert BGR to LAB
+    lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
+    # Negate L channel
+    lab[:, :, 0] = 255 - lab[:, :, 0]
+    # Convert back to BGR
+    bgr_result = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
+    # Convert BGR back to RGB
+    rgb_result = cv2.cvtColor(bgr_result, cv2.COLOR_BGR2RGB)
+    # Convert numpy array back to PIL Image
+    return Image.fromarray(rgb_result)

imcui/third_party/dad/dad/benchmarks/__init__.py ADDED Viewed

	@@ -0,0 +1,21 @@

+# from .benchmark import Benchmark as Benchmark
+from .num_inliers import NumInliersBenchmark as NumInliersBenchmark
+from .megadepth import Mega1500 as Mega1500
+from .megadepth import Mega1500_F as Mega1500_F
+from .megadepth import MegaIMCPT as MegaIMCPT
+from .megadepth import MegaIMCPT_F as MegaIMCPT_F
+from .scannet import ScanNet1500 as ScanNet1500
+from .scannet import ScanNet1500_F as ScanNet1500_F
+from .hpatches import HPatchesViewpoint as HPatchesViewpoint
+from .hpatches import HPatchesIllum as HPatchesIllum
+all_benchmarks = [
+    Mega1500.__name__,
+    Mega1500_F.__name__,
+    MegaIMCPT.__name__,
+    MegaIMCPT_F.__name__,
+    ScanNet1500.__name__,
+    ScanNet1500_F.__name__,
+    HPatchesViewpoint.__name__,
+    HPatchesIllum.__name__,
+]

imcui/third_party/dad/dad/benchmarks/hpatches.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import os
+from typing import Optional
+import numpy as np
+import poselib
+from PIL import Image
+from tqdm import tqdm
+from dad.types import Detector, Matcher, Benchmark
+class HPatchesBenchmark(Benchmark):
+    def __init__(
+        self,
+        data_root="data/hpatches",
+        sample_every=1,
+        num_ransac_runs=5,
+        num_keypoints: Optional[list[int]] = None,
+    ) -> None:
+        super().__init__(
+            data_root=data_root,
+            num_keypoints=num_keypoints,
+            sample_every=sample_every,
+            num_ransac_runs=num_ransac_runs,
+            thresholds=[3, 5, 10],
+        )
+        seqs_dir = "hpatches-sequences-release"
+        self.seqs_path = os.path.join(self.data_root, seqs_dir)
+        self.seq_names = sorted(os.listdir(self.seqs_path))
+        self.topleft = 0.0
+        self._post_init()
+        self.skip_seqs: str
+        self.scene_names: list[str]
+    def _post_init(self):
+        # set self.skip_seqs and self.scene_names here
+        raise NotImplementedError()
+    def benchmark(self, detector: Detector, matcher: Matcher):
+        homog_dists = []
+        for seq_idx, seq_name in enumerate(tqdm(self.seq_names[:: self.sample_every])):
+            if self.skip_seqs in seq_name:
+                # skip illumination seqs
+                continue
+            im_A_path = os.path.join(self.seqs_path, seq_name, "1.ppm")
+            im_A = Image.open(im_A_path)
+            w1, h1 = im_A.size
+            for im_idx in list(range(2, 7)):
+                im_B_path = os.path.join(self.seqs_path, seq_name, f"{im_idx}.ppm")
+                H = np.loadtxt(
+                    os.path.join(self.seqs_path, seq_name, "H_1_" + str(im_idx))
+                )
+                warp, certainty = matcher.match(im_A_path, im_B_path)
+                for num_kps in self.num_keypoints:
+                    keypoints_A = detector.detect_from_path(
+                        im_A_path,
+                        num_keypoints=num_kps,
+                    )["keypoints"][0]
+                    keypoints_B = detector.detect_from_path(
+                        im_B_path,
+                        num_keypoints=num_kps,
+                    )["keypoints"][0]
+                    matches = matcher.match_keypoints(
+                        keypoints_A,
+                        keypoints_B,
+                        warp,
+                        certainty,
+                        return_tuple=False,
+                    )
+                    im_A = Image.open(im_A_path)
+                    w1, h1 = im_A.size
+                    im_B = Image.open(im_B_path)
+                    w2, h2 = im_B.size
+                    kpts1, kpts2 = matcher.to_pixel_coordinates(matches, h1, w1, h2, w2)
+                    offset = detector.topleft - self.topleft
+                    kpts1, kpts2 = kpts1 - offset, kpts2 - offset
+                    for _ in range(self.num_ransac_runs):
+                        shuffling = np.random.permutation(np.arange(len(kpts1)))
+                        kpts1 = kpts1[shuffling]
+                        kpts2 = kpts2[shuffling]
+                        threshold = 2.0
+                        H_pred, res = poselib.estimate_homography(
+                            kpts1.cpu().numpy(),
+                            kpts2.cpu().numpy(),
+                            ransac_opt={
+                                "max_reproj_error": threshold,
+                            },
+                        )
+                        corners = np.array(
+                            [
+                                [0, 0, 1],
+                                [0, h1 - 1, 1],
+                                [w1 - 1, 0, 1],
+                                [w1 - 1, h1 - 1, 1],
+                            ]
+                        )
+                        real_warped_corners = np.dot(corners, np.transpose(H))
+                        real_warped_corners = (
+                            real_warped_corners[:, :2] / real_warped_corners[:, 2:]
+                        )
+                        warped_corners = np.dot(corners, np.transpose(H_pred))
+                        warped_corners = warped_corners[:, :2] / warped_corners[:, 2:]
+                        mean_dist = np.mean(
+                            np.linalg.norm(real_warped_corners - warped_corners, axis=1)
+                        ) / (min(w2, h2) / 480.0)
+                        homog_dists.append(mean_dist)
+        return self.compute_auc(np.array(homog_dists))
+class HPatchesViewpoint(HPatchesBenchmark):
+    def _post_init(self):
+        self.skip_seqs = "i_"
+class HPatchesIllum(HPatchesBenchmark):
+    def _post_init(self):
+        self.skip_seqs = "v_"

imcui/third_party/dad/dad/benchmarks/megadepth.py ADDED Viewed

	@@ -0,0 +1,219 @@

+from typing import Literal, Optional
+import numpy as np
+from PIL import Image
+from tqdm import tqdm
+from dad.types import Detector, Matcher, Benchmark
+from dad.utils import (
+    compute_pose_error,
+    compute_relative_pose,
+    estimate_pose_essential,
+    estimate_pose_fundamental,
+)
+class MegaDepthPoseEstimationBenchmark(Benchmark):
+    def __init__(
+        self,
+        data_root="data/megadepth",
+        sample_every=1,
+        num_ransac_runs=5,
+        num_keypoints: Optional[list[int]] = None,
+    ) -> None:
+        super().__init__(
+            data_root=data_root,
+            num_keypoints=num_keypoints,
+            sample_every=sample_every,
+            num_ransac_runs=num_ransac_runs,
+            thresholds=[5, 10, 20],
+        )
+        self.sample_every = sample_every
+        self.topleft = 0.5
+        self._post_init()
+        self.model: Literal["fundamental", "essential"]
+        self.scene_names: list[str]
+        self.benchmark_name: str
+    def _post_init(self):
+        raise NotImplementedError(
+            "Add scene names and benchmark name in derived class _post_init"
+        )
+    def benchmark(
+        self,
+        detector: Detector,
+        matcher: Matcher,
+    ):
+        self.scenes = [
+            np.load(f"{self.data_root}/{scene}", allow_pickle=True)
+            for scene in self.scene_names
+        ]
+        data_root = self.data_root
+        tot_e_pose = []
+        n_matches = []
+        for scene_ind in range(len(self.scenes)):
+            scene = self.scenes[scene_ind]
+            pairs = scene["pair_infos"]
+            intrinsics = scene["intrinsics"]
+            poses = scene["poses"]
+            im_paths = scene["image_paths"]
+            pair_inds = range(len(pairs))
+            for pairind in (
+                pbar := tqdm(
+                    pair_inds[:: self.sample_every],
+                    desc="Current AUC: ?",
+                    mininterval=10,
+                )
+            ):
+                idx1, idx2 = pairs[pairind][0]
+                K1 = intrinsics[idx1].copy()
+                T1 = poses[idx1].copy()
+                R1, t1 = T1[:3, :3], T1[:3, 3]
+                K2 = intrinsics[idx2].copy()
+                T2 = poses[idx2].copy()
+                R2, t2 = T2[:3, :3], T2[:3, 3]
+                R, t = compute_relative_pose(R1, t1, R2, t2)
+                im_A_path = f"{data_root}/{im_paths[idx1]}"
+                im_B_path = f"{data_root}/{im_paths[idx2]}"
+                warp, certainty = matcher.match(im_A_path, im_B_path)
+                for num_kps in self.num_keypoints:
+                    keypoints_A = detector.detect_from_path(
+                        im_A_path,
+                        num_keypoints=num_kps,
+                    )["keypoints"][0]
+                    keypoints_B = detector.detect_from_path(
+                        im_B_path,
+                        num_keypoints=num_kps,
+                    )["keypoints"][0]
+                    matches = matcher.match_keypoints(
+                        keypoints_A,
+                        keypoints_B,
+                        warp,
+                        certainty,
+                        return_tuple=False,
+                    )
+                    n_matches.append(matches.shape[0])
+                    im_A = Image.open(im_A_path)
+                    w1, h1 = im_A.size
+                    im_B = Image.open(im_B_path)
+                    w2, h2 = im_B.size
+                    kpts1, kpts2 = matcher.to_pixel_coordinates(matches, h1, w1, h2, w2)
+                    offset = detector.topleft - self.topleft
+                    kpts1, kpts2 = kpts1 - offset, kpts2 - offset
+                    for _ in range(self.num_ransac_runs):
+                        shuffling = np.random.permutation(np.arange(len(kpts1)))
+                        kpts1 = kpts1[shuffling]
+                        kpts2 = kpts2[shuffling]
+                        threshold = 2.0
+                        if self.model == "essential":
+                            R_est, t_est = estimate_pose_essential(
+                                kpts1.cpu().numpy(),
+                                kpts2.cpu().numpy(),
+                                w1,
+                                h1,
+                                K1,
+                                w2,
+                                h2,
+                                K2,
+                                threshold,
+                            )
+                        elif self.model == "fundamental":
+                            R_est, t_est = estimate_pose_fundamental(
+                                kpts1.cpu().numpy(),
+                                kpts2.cpu().numpy(),
+                                w1,
+                                h1,
+                                K1,
+                                w2,
+                                h2,
+                                K2,
+                                threshold,
+                            )
+                        T1_to_2_est = np.concatenate((R_est, t_est[:, None]), axis=-1)
+                        e_t, e_R = compute_pose_error(T1_to_2_est, R, t)
+                        e_pose = max(e_t, e_R)
+                        tot_e_pose.append(e_pose)
+                pbar.set_description(
+                    f"Current AUCS: {self.compute_auc(np.array(tot_e_pose))}"
+                )
+        n_matches = np.array(n_matches)
+        print(n_matches.mean(), np.median(n_matches), np.std(n_matches))
+        return self.compute_auc(np.array(tot_e_pose))
+class Mega1500(MegaDepthPoseEstimationBenchmark):
+    def _post_init(self):
+        self.scene_names = [
+            "0015_0.1_0.3.npz",
+            "0015_0.3_0.5.npz",
+            "0022_0.1_0.3.npz",
+            "0022_0.3_0.5.npz",
+            "0022_0.5_0.7.npz",
+        ]
+        self.benchmark_name = "Mega1500"
+        self.model = "essential"
+class Mega1500_F(MegaDepthPoseEstimationBenchmark):
+    def _post_init(self):
+        self.scene_names = [
+            "0015_0.1_0.3.npz",
+            "0015_0.3_0.5.npz",
+            "0022_0.1_0.3.npz",
+            "0022_0.3_0.5.npz",
+            "0022_0.5_0.7.npz",
+        ]
+        # self.benchmark_name = "Mega1500_F"
+        self.model = "fundamental"
+class MegaIMCPT(MegaDepthPoseEstimationBenchmark):
+    def _post_init(self):
+        self.scene_names = [
+            "mega_8_scenes_0008_0.1_0.3.npz",
+            "mega_8_scenes_0008_0.3_0.5.npz",
+            "mega_8_scenes_0019_0.1_0.3.npz",
+            "mega_8_scenes_0019_0.3_0.5.npz",
+            "mega_8_scenes_0021_0.1_0.3.npz",
+            "mega_8_scenes_0021_0.3_0.5.npz",
+            "mega_8_scenes_0024_0.1_0.3.npz",
+            "mega_8_scenes_0024_0.3_0.5.npz",
+            "mega_8_scenes_0025_0.1_0.3.npz",
+            "mega_8_scenes_0025_0.3_0.5.npz",
+            "mega_8_scenes_0032_0.1_0.3.npz",
+            "mega_8_scenes_0032_0.3_0.5.npz",
+            "mega_8_scenes_0063_0.1_0.3.npz",
+            "mega_8_scenes_0063_0.3_0.5.npz",
+            "mega_8_scenes_1589_0.1_0.3.npz",
+            "mega_8_scenes_1589_0.3_0.5.npz",
+        ]
+        # self.benchmark_name = "MegaIMCPT"
+        self.model = "essential"
+class MegaIMCPT_F(MegaDepthPoseEstimationBenchmark):
+    def _post_init(self):
+        self.scene_names = [
+            "mega_8_scenes_0008_0.1_0.3.npz",
+            "mega_8_scenes_0008_0.3_0.5.npz",
+            "mega_8_scenes_0019_0.1_0.3.npz",
+            "mega_8_scenes_0019_0.3_0.5.npz",
+            "mega_8_scenes_0021_0.1_0.3.npz",
+            "mega_8_scenes_0021_0.3_0.5.npz",
+            "mega_8_scenes_0024_0.1_0.3.npz",
+            "mega_8_scenes_0024_0.3_0.5.npz",
+            "mega_8_scenes_0025_0.1_0.3.npz",
+            "mega_8_scenes_0025_0.3_0.5.npz",
+            "mega_8_scenes_0032_0.1_0.3.npz",
+            "mega_8_scenes_0032_0.3_0.5.npz",
+            "mega_8_scenes_0063_0.1_0.3.npz",
+            "mega_8_scenes_0063_0.3_0.5.npz",
+            "mega_8_scenes_1589_0.1_0.3.npz",
+            "mega_8_scenes_1589_0.3_0.5.npz",
+        ]
+        # self.benchmark_name = "MegaIMCPT_F"
+        self.model = "fundamental"

imcui/third_party/dad/dad/benchmarks/num_inliers.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import torch
+import torch.nn.functional as F
+from tqdm import tqdm
+from dad.types import Detector
+from dad.utils import get_gt_warp, to_best_device
+class NumInliersBenchmark:
+    def __init__(
+        self,
+        dataset,
+        num_samples=1000,
+        batch_size=8,
+        num_keypoints=512,
+        **kwargs,
+    ) -> None:
+        sampler = torch.utils.data.WeightedRandomSampler(
+            torch.ones(len(dataset)), replacement=False, num_samples=num_samples
+        )
+        dataloader = torch.utils.data.DataLoader(
+            dataset, batch_size=batch_size, num_workers=batch_size, sampler=sampler
+        )
+        self.dataloader = dataloader
+        self.tracked_metrics = {}
+        self.batch_size = batch_size
+        self.N = len(dataloader)
+        self.num_keypoints = num_keypoints
+    def compute_batch_metrics(self, outputs, batch):
+        kpts_A, kpts_B = outputs["keypoints_A"], outputs["keypoints_B"]
+        B, K, H, W = batch["im_A"].shape
+        gt_warp_A_to_B, valid_mask_A_to_B = get_gt_warp(
+            batch["im_A_depth"],
+            batch["im_B_depth"],
+            batch["T_1to2"],
+            batch["K1"],
+            batch["K2"],
+            H=H,
+            W=W,
+        )
+        kpts_A_to_B = F.grid_sample(
+            gt_warp_A_to_B[..., 2:].float().permute(0, 3, 1, 2),
+            kpts_A[..., None, :],
+            align_corners=False,
+            mode="bilinear",
+        )[..., 0].mT
+        legit_A_to_B = F.grid_sample(
+            valid_mask_A_to_B.reshape(B, 1, H, W),
+            kpts_A[..., None, :],
+            align_corners=False,
+            mode="bilinear",
+        )[..., 0, :, 0]
+        dists = (
+            torch.cdist(kpts_A_to_B, kpts_B).min(dim=-1).values[legit_A_to_B > 0.0]
+        ).float()
+        if legit_A_to_B.sum() == 0:
+            return
+        percent_inliers_at_1 = (dists < 0.02).float().mean()
+        percent_inliers_at_05 = (dists < 0.01).float().mean()
+        percent_inliers_at_025 = (dists < 0.005).float().mean()
+        percent_inliers_at_01 = (dists < 0.002).float().mean()
+        percent_inliers_at_005 = (dists < 0.001).float().mean()
+        self.tracked_metrics["percent_inliers_at_1"] = (
+            self.tracked_metrics.get("percent_inliers_at_1", 0)
+            + 1 / self.N * percent_inliers_at_1
+        )
+        self.tracked_metrics["percent_inliers_at_05"] = (
+            self.tracked_metrics.get("percent_inliers_at_05", 0)
+            + 1 / self.N * percent_inliers_at_05
+        )
+        self.tracked_metrics["percent_inliers_at_025"] = (
+            self.tracked_metrics.get("percent_inliers_at_025", 0)
+            + 1 / self.N * percent_inliers_at_025
+        )
+        self.tracked_metrics["percent_inliers_at_01"] = (
+            self.tracked_metrics.get("percent_inliers_at_01", 0)
+            + 1 / self.N * percent_inliers_at_01
+        )
+        self.tracked_metrics["percent_inliers_at_005"] = (
+            self.tracked_metrics.get("percent_inliers_at_005", 0)
+            + 1 / self.N * percent_inliers_at_005
+        )
+    def benchmark(self, detector: Detector):
+        self.tracked_metrics = {}
+        print("Evaluating percent inliers...")
+        for idx, batch in enumerate(tqdm(self.dataloader, mininterval=10.0)):
+            batch = to_best_device(batch)
+            outputs = detector.detect(batch, num_keypoints=self.num_keypoints)
+            keypoints_A, keypoints_B = outputs["keypoints"].chunk(2)
+            if isinstance(outputs["keypoints"], (tuple, list)):
+                keypoints_A, keypoints_B = (
+                    torch.stack(keypoints_A),
+                    torch.stack(keypoints_B),
+                )
+            outputs = {"keypoints_A": keypoints_A, "keypoints_B": keypoints_B}
+            self.compute_batch_metrics(outputs, batch)
+        [
+            print(name, metric.item() * self.N / (idx + 1))
+            for name, metric in self.tracked_metrics.items()
+            if "percent" in name
+        ]
+        return self.tracked_metrics

imcui/third_party/dad/dad/benchmarks/scannet.py ADDED Viewed

	@@ -0,0 +1,163 @@

+import os.path as osp
+from typing import Literal, Optional
+import numpy as np
+import torch
+from PIL import Image
+from tqdm import tqdm
+from dad.types import Detector, Matcher, Benchmark
+from dad.utils import (
+    compute_pose_error,
+    estimate_pose_essential,
+    estimate_pose_fundamental,
+)
+class ScanNetBenchmark(Benchmark):
+    def __init__(
+        self,
+        sample_every: int = 1,
+        num_ransac_runs=5,
+        data_root: str = "data/scannet",
+        num_keypoints: Optional[list[int]] = None,
+    ) -> None:
+        super().__init__(
+            data_root=data_root,
+            num_keypoints=num_keypoints,
+            sample_every=sample_every,
+            num_ransac_runs=num_ransac_runs,
+            thresholds=[5, 10, 20],
+        )
+        self.sample_every = sample_every
+        self.topleft = 0.0
+        self._post_init()
+        self.model: Literal["fundamental", "essential"]
+        self.test_pairs: str
+        self.benchmark_name: str
+    def _post_init(self):
+        # set
+        raise NotImplementedError("")
+    @torch.no_grad()
+    def benchmark(self, matcher: Matcher, detector: Detector):
+        tmp = np.load(self.test_pairs)
+        pairs, rel_pose = tmp["name"], tmp["rel_pose"]
+        tot_e_pose = []
+        # pair_inds = np.random.choice(range(len(pairs)), size=len(pairs), replace=False)
+        for pairind in tqdm(
+            range(0, len(pairs), self.sample_every), smoothing=0.9, mininterval=10
+        ):
+            scene = pairs[pairind]
+            scene_name = f"scene0{scene[0]}_00"
+            im_A_path = osp.join(
+                self.data_root,
+                "scans_test",
+                scene_name,
+                "color",
+                f"{scene[2]}.jpg",
+            )
+            im_A = Image.open(im_A_path)
+            im_B_path = osp.join(
+                self.data_root,
+                "scans_test",
+                scene_name,
+                "color",
+                f"{scene[3]}.jpg",
+            )
+            im_B = Image.open(im_B_path)
+            T_gt = rel_pose[pairind].reshape(3, 4)
+            R, t = T_gt[:3, :3], T_gt[:3, 3]
+            K = np.stack(
+                [
+                    np.array([float(i) for i in r.split()])
+                    for r in open(
+                        osp.join(
+                            self.data_root,
+                            "scans_test",
+                            scene_name,
+                            "intrinsic",
+                            "intrinsic_color.txt",
+                        ),
+                        "r",
+                    )
+                    .read()
+                    .split("\n")
+                    if r
+                ]
+            )
+            w1, h1 = im_A.size
+            w2, h2 = im_B.size
+            K1 = K.copy()[:3, :3]
+            K2 = K.copy()[:3, :3]
+            warp, certainty = matcher.match(im_A_path, im_B_path)
+            for num_kps in self.num_keypoints:
+                keypoints_A = detector.detect_from_path(
+                    im_A_path,
+                    num_keypoints=num_kps,
+                )["keypoints"][0]
+                keypoints_B = detector.detect_from_path(
+                    im_B_path,
+                    num_keypoints=num_kps,
+                )["keypoints"][0]
+                matches = matcher.match_keypoints(
+                    keypoints_A,
+                    keypoints_B,
+                    warp,
+                    certainty,
+                    return_tuple=False,
+                )
+                kpts1, kpts2 = matcher.to_pixel_coordinates(matches, h1, w1, h2, w2)
+                offset = detector.topleft - self.topleft
+                kpts1, kpts2 = kpts1 - offset, kpts2 - offset
+                for _ in range(self.num_ransac_runs):
+                    shuffling = np.random.permutation(np.arange(len(kpts1)))
+                    kpts1 = kpts1[shuffling]
+                    kpts2 = kpts2[shuffling]
+                    threshold = 2.0
+                    if self.model == "essential":
+                        R_est, t_est = estimate_pose_essential(
+                            kpts1.cpu().numpy(),
+                            kpts2.cpu().numpy(),
+                            w1,
+                            h1,
+                            K1,
+                            w2,
+                            h2,
+                            K2,
+                            threshold,
+                        )
+                    elif self.model == "fundamental":
+                        R_est, t_est = estimate_pose_fundamental(
+                            kpts1.cpu().numpy(),
+                            kpts2.cpu().numpy(),
+                            w1,
+                            h1,
+                            K1,
+                            w2,
+                            h2,
+                            K2,
+                            threshold,
+                        )
+                    T1_to_2_est = np.concatenate((R_est, t_est[:, None]), axis=-1)
+                    e_t, e_R = compute_pose_error(T1_to_2_est, R, t)
+                    e_pose = max(e_t, e_R)
+                    tot_e_pose.append(e_pose)
+        return self.compute_auc(np.array(tot_e_pose))
+class ScanNet1500(ScanNetBenchmark):
+    def _post_init(self):
+        self.test_pairs = osp.join(self.data_root, "test.npz")
+        self.benchmark_name = "ScanNet1500"
+        self.model = "essential"
+class ScanNet1500_F(ScanNetBenchmark):
+    def _post_init(self):
+        self.test_pairs = osp.join(self.data_root, "test.npz")
+        self.benchmark_name = "ScanNet1500_F"
+        self.model = "fundamental"

imcui/third_party/dad/dad/checkpoint.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import torch
+from torch.nn.parallel.data_parallel import DataParallel
+from torch.nn.parallel.distributed import DistributedDataParallel
+import gc
+from pathlib import Path
+import dad
+from dad.types import Detector
+class CheckPoint:
+    def __init__(self, dir):
+        self.dir = Path(dir)
+        self.dir.mkdir(parents=True, exist_ok=True)
+    def save(
+        self,
+        model: Detector,
+        optimizer,
+        lr_scheduler,
+        n,
+    ):
+        assert model is not None
+        if isinstance(model, (DataParallel, DistributedDataParallel)):
+            model = model.module
+        states = {
+            "model": model.state_dict(),
+            "n": n,
+            "optimizer": optimizer.state_dict(),
+            "lr_scheduler": lr_scheduler.state_dict(),
+        }
+        torch.save(states, self.dir / "model_latest.pth")
+        dad.logger.info(f"Saved states {list(states.keys())}, at step {n}")
+    def load(
+        self,
+        model: Detector,
+        optimizer,
+        lr_scheduler,
+        n,
+    ):
+        if not (self.dir / "model_latest.pth").exists():
+            return model, optimizer, lr_scheduler, n
+        states = torch.load(self.dir / "model_latest.pth")
+        if "model" in states:
+            model.load_state_dict(states["model"])
+        if "n" in states:
+            n = states["n"] if states["n"] else n
+        if "optimizer" in states:
+            try:
+                optimizer.load_state_dict(states["optimizer"])
+            except Exception as e:
+                dad.logger.warning(
+                    f"Failed to load states for optimizer, with error {e}"
+                )
+        if "lr_scheduler" in states:
+            lr_scheduler.load_state_dict(states["lr_scheduler"])
+        dad.logger.info(f"Loaded states {list(states.keys())}, at step {n}")
+        del states
+        gc.collect()
+        torch.cuda.empty_cache()
+        return model, optimizer, lr_scheduler, n

imcui/third_party/dad/dad/datasets/__init__.py ADDED Viewed

File without changes

imcui/third_party/dad/dad/datasets/megadepth.py ADDED Viewed

	@@ -0,0 +1,312 @@

+import os
+from PIL import Image
+import h5py
+import math
+import numpy as np
+import torch
+import torchvision.transforms.functional as tvf
+from tqdm import tqdm
+import dad
+from dad.augs import (
+    get_tuple_transform_ops,
+    get_depth_tuple_transform_ops,
+)
+from torch.utils.data import ConcatDataset
+class MegadepthScene:
+    def __init__(
+        self,
+        data_root,
+        scene_info,
+        scene_name=None,
+        min_overlap=0.0,
+        max_overlap=1.0,
+        image_size=640,
+        normalize=True,
+        shake_t=32,
+        rot_360=False,
+        max_num_pairs=100_000,
+    ) -> None:
+        self.data_root = data_root
+        self.scene_name = (
+            os.path.splitext(scene_name)[0] + f"_{min_overlap}_{max_overlap}"
+        )
+        self.image_paths = scene_info["image_paths"]
+        self.depth_paths = scene_info["depth_paths"]
+        self.intrinsics = scene_info["intrinsics"]
+        self.poses = scene_info["poses"]
+        self.pairs = scene_info["pairs"]
+        self.overlaps = scene_info["overlaps"]
+        threshold = (self.overlaps > min_overlap) & (self.overlaps < max_overlap)
+        self.pairs = self.pairs[threshold]
+        self.overlaps = self.overlaps[threshold]
+        if len(self.pairs) > max_num_pairs:
+            pairinds = np.random.choice(
+                np.arange(0, len(self.pairs)), max_num_pairs, replace=False
+            )
+            self.pairs = self.pairs[pairinds]
+            self.overlaps = self.overlaps[pairinds]
+        self.im_transform_ops = get_tuple_transform_ops(
+            resize=(image_size, image_size),
+            normalize=normalize,
+        )
+        self.depth_transform_ops = get_depth_tuple_transform_ops(
+            resize=(image_size, image_size), normalize=False
+        )
+        self.image_size = image_size
+        self.shake_t = shake_t
+        self.rot_360 = rot_360
+    def load_im(self, im_B, crop=None):
+        im = Image.open(im_B)
+        return im
+    def rot_360_deg(self, im, depth, K, angle):
+        C, H, W = im.shape
+        im = tvf.rotate(im, angle, expand=True)
+        depth = tvf.rotate(depth, angle, expand=True)
+        radians = angle * math.pi / 180
+        rot_mat = torch.tensor(
+            [
+                [math.cos(radians), math.sin(radians), 0],
+                [-math.sin(radians), math.cos(radians), 0],
+                [0, 0, 1.0],
+            ]
+        ).to(K.device)
+        t_mat = torch.tensor([[1, 0, W / 2], [0, 1, H / 2], [0, 0, 1.0]]).to(K.device)
+        neg_t_mat = torch.tensor([[1, 0, -W / 2], [0, 1, -H / 2], [0, 0, 1.0]]).to(
+            K.device
+        )
+        transform = t_mat @ rot_mat @ neg_t_mat
+        K = transform @ K
+        return im, depth, K, transform
+    def load_depth(self, depth_ref, crop=None):
+        depth = np.array(h5py.File(depth_ref, "r")["depth"])
+        return torch.from_numpy(depth)
+    def __len__(self):
+        return len(self.pairs)
+    def scale_intrinsic(self, K, wi, hi):
+        sx, sy = self.image_size / wi, self.image_size / hi
+        sK = torch.tensor([[sx, 0, 0], [0, sy, 0], [0, 0, 1]])
+        return sK @ K
+    def rand_shake(self, *things):
+        t = np.random.choice(range(-self.shake_t, self.shake_t + 1), size=(2))
+        return [
+            tvf.affine(thing, angle=0.0, translate=list(t), scale=1.0, shear=[0.0, 0.0])
+            for thing in things
+        ], t
+    def __getitem__(self, pair_idx):
+        try:
+            # read intrinsics of original size
+            idx1, idx2 = self.pairs[pair_idx]
+            K1 = torch.tensor(self.intrinsics[idx1].copy(), dtype=torch.float).reshape(
+                3, 3
+            )
+            K2 = torch.tensor(self.intrinsics[idx2].copy(), dtype=torch.float).reshape(
+                3, 3
+            )
+            # read and compute relative poses
+            T1 = self.poses[idx1]
+            T2 = self.poses[idx2]
+            T_1to2 = torch.tensor(np.matmul(T2, np.linalg.inv(T1)), dtype=torch.float)[
+                :4, :4
+            ]  # (4, 4)
+            # Load positive pair data
+            im_A, im_B = self.image_paths[idx1], self.image_paths[idx2]
+            depth1, depth2 = self.depth_paths[idx1], self.depth_paths[idx2]
+            im_A_ref = os.path.join(self.data_root, im_A)
+            im_B_ref = os.path.join(self.data_root, im_B)
+            depth_A_ref = os.path.join(self.data_root, depth1)
+            depth_B_ref = os.path.join(self.data_root, depth2)
+            im_A: Image.Image = self.load_im(im_A_ref)
+            im_B: Image.Image = self.load_im(im_B_ref)
+            depth_A = self.load_depth(depth_A_ref)
+            depth_B = self.load_depth(depth_B_ref)
+            # Recompute camera intrinsic matrix due to the resize
+            W_A, H_A = im_A.width, im_A.height
+            W_B, H_B = im_B.width, im_B.height
+            K1 = self.scale_intrinsic(K1, W_A, H_A)
+            K2 = self.scale_intrinsic(K2, W_B, H_B)
+            # Process images
+            im_A, im_B = self.im_transform_ops((im_A, im_B))
+            depth_A, depth_B = self.depth_transform_ops(
+                (depth_A[None, None], depth_B[None, None])
+            )
+            [im_A, depth_A], t_A = self.rand_shake(im_A, depth_A)
+            [im_B, depth_B], t_B = self.rand_shake(im_B, depth_B)
+            K1[:2, 2] += t_A
+            K2[:2, 2] += t_B
+            if self.rot_360:
+                angle_A = np.random.choice([-90, 0, 90, 180])
+                angle_B = np.random.choice([-90, 0, 90, 180])
+                angle_A, angle_B = int(angle_A), int(angle_B)
+                im_A, depth_A, K1, _ = self.rot_360_deg(
+                    im_A, depth_A, K1, angle=angle_A
+                )
+                im_B, depth_B, K2, _ = self.rot_360_deg(
+                    im_B, depth_B, K2, angle=angle_B
+                )
+            else:
+                angle_A = 0
+                angle_B = 0
+            data_dict = {
+                "im_A": im_A,
+                "im_A_identifier": self.image_paths[idx1]
+                .split("/")[-1]
+                .split(".jpg")[0],
+                "im_B": im_B,
+                "im_B_identifier": self.image_paths[idx2]
+                .split("/")[-1]
+                .split(".jpg")[0],
+                "im_A_depth": depth_A[0, 0],
+                "im_B_depth": depth_B[0, 0],
+                "pose_A": T1,
+                "pose_B": T2,
+                "K1": K1,
+                "K2": K2,
+                "T_1to2": T_1to2,
+                "im_A_path": im_A_ref,
+                "im_B_path": im_B_ref,
+                "angle_A": angle_A,
+                "angle_B": angle_B,
+            }
+        except Exception as e:
+            dad.logger.warning(e)
+            dad.logger.warning(f"Failed to load image pair {self.pairs[pair_idx]}")
+            dad.logger.warning("Loading a random pair in scene instead")
+            rand_ind = np.random.choice(range(len(self)))
+            return self[rand_ind]
+        return data_dict
+class MegadepthBuilder:
+    def __init__(self, data_root, loftr_ignore=True, imc21_ignore=True) -> None:
+        self.data_root = data_root
+        self.scene_info_root = os.path.join(data_root, "prep_scene_info")
+        self.all_scenes = os.listdir(self.scene_info_root)
+        self.test_scenes = ["0017.npy", "0004.npy", "0048.npy", "0013.npy"]
+        # LoFTR did the D2-net preprocessing differently than we did and got more ignore scenes, can optionially ignore those
+        self.loftr_ignore_scenes = set(
+            [
+                "0121.npy",
+                "0133.npy",
+                "0168.npy",
+                "0178.npy",
+                "0229.npy",
+                "0349.npy",
+                "0412.npy",
+                "0430.npy",
+                "0443.npy",
+                "1001.npy",
+                "5014.npy",
+                "5015.npy",
+                "5016.npy",
+            ]
+        )
+        self.imc21_scenes = set(
+            [
+                "0008.npy",
+                "0019.npy",
+                "0021.npy",
+                "0024.npy",
+                "0025.npy",
+                "0032.npy",
+                "0063.npy",
+                "1589.npy",
+            ]
+        )
+        self.test_scenes_loftr = ["0015.npy", "0022.npy"]
+        self.loftr_ignore = loftr_ignore
+        self.imc21_ignore = imc21_ignore
+    def build_scenes(self, split, **kwargs):
+        if split == "train":
+            scene_names = set(self.all_scenes) - set(self.test_scenes)
+        elif split == "train_loftr":
+            scene_names = set(self.all_scenes) - set(self.test_scenes_loftr)
+        elif split == "test":
+            scene_names = self.test_scenes
+        elif split == "test_loftr":
+            scene_names = self.test_scenes_loftr
+        elif split == "all_scenes":
+            scene_names = self.all_scenes
+        elif split == "custom":
+            scene_names = scene_names
+        else:
+            raise ValueError(f"Split {split} not available")
+        scenes = []
+        for scene_name in tqdm(scene_names):
+            if self.loftr_ignore and scene_name in self.loftr_ignore_scenes:
+                continue
+            if self.imc21_ignore and scene_name in self.imc21_scenes:
+                continue
+            if ".npy" not in scene_name:
+                continue
+            scene_info = np.load(
+                os.path.join(self.scene_info_root, scene_name), allow_pickle=True
+            ).item()
+            scenes.append(
+                MegadepthScene(
+                    self.data_root,
+                    scene_info,
+                    scene_name=scene_name,
+                    **kwargs,
+                )
+            )
+        return scenes
+    def weight_scenes(self, concat_dataset, alpha=0.5):
+        ns = []
+        for d in concat_dataset.datasets:
+            ns.append(len(d))
+        ws = torch.cat([torch.ones(n) / n**alpha for n in ns])
+        return ws
+    def dedode_train_split(self, **kwargs):
+        megadepth_train1 = self.build_scenes(
+            split="train_loftr", min_overlap=0.01, **kwargs
+        )
+        megadepth_train2 = self.build_scenes(
+            split="train_loftr", min_overlap=0.35, **kwargs
+        )
+        megadepth_train = ConcatDataset(megadepth_train1 + megadepth_train2)
+        return megadepth_train
+    def hard_train_split(self, **kwargs):
+        megadepth_train = self.build_scenes(
+            split="train_loftr", min_overlap=0.01, **kwargs
+        )
+        megadepth_train = ConcatDataset(megadepth_train)
+        return megadepth_train
+    def easy_train_split(self, **kwargs):
+        megadepth_train = self.build_scenes(
+            split="train_loftr", min_overlap=0.35, **kwargs
+        )
+        megadepth_train = ConcatDataset(megadepth_train)
+        return megadepth_train
+    def dedode_test_split(self, **kwargs):
+        megadepth_test = self.build_scenes(
+            split="test_loftr",
+            min_overlap=0.01,
+            **kwargs,
+        )
+        megadepth_test = ConcatDataset(megadepth_test)
+        return megadepth_test

imcui/third_party/dad/dad/detectors/__init__.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from .dedode_detector import load_DaD as load_DaD
+from .dedode_detector import load_DaDDark as load_DaDDark
+from .dedode_detector import load_DaDLight as load_DaDLight
+from .dedode_detector import dedode_detector_S as dedode_detector_S
+from .dedode_detector import dedode_detector_B as dedode_detector_B
+from .dedode_detector import dedode_detector_L as dedode_detector_L
+from .dedode_detector import load_dedode_v2 as load_dedode_v2
+lg_detectors = ["ALIKED", "ALIKEDROT", "SIFT", "DISK", "SuperPoint", "ReinforcedFP"]
+other_detectors = ["HesAff", "HarrisAff", "REKD"]
+dedode_detectors = [
+    "DeDoDe-v2",
+    "DaD",
+    "DaDLight",
+    "DaDDark",
+]
+all_detectors = lg_detectors + dedode_detectors + other_detectors
+def load_detector_by_name(detector_name, *, resize=1024, weights_path=None):
+    if detector_name == "DaD":
+        detector = load_DaD(resize=resize, weights_path=weights_path)
+    elif detector_name == "DaDLight":
+        detector = load_DaDLight(resize=resize, weights_path=weights_path)
+    elif detector_name == "DaDDark":
+        detector = load_DaDDark(resize=resize, weights_path=weights_path)
+    elif detector_name == "DeDoDe-v2":
+        detector = load_dedode_v2()
+    elif detector_name in lg_detectors:
+        from .third_party import lightglue, LightGlueDetector
+        detector = LightGlueDetector(
+            getattr(lightglue, detector_name), detection_threshold=0, resize=resize
+        )
+    elif detector_name == "HesAff":
+        from .third_party import HesAff
+        detector = HesAff()
+    elif detector_name == "HarrisAff":
+        from .third_party import HarrisAff
+        detector = HarrisAff()
+    elif detector_name == "REKD":
+        from .third_party import load_REKD
+        detector = load_REKD(resize=resize)
+    else:
+        raise ValueError(f"Couldn't find detector with detector name {detector_name}")
+    return detector

imcui/third_party/dad/dad/detectors/dedode_detector.py ADDED Viewed

	@@ -0,0 +1,559 @@

+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.models as tvm
+import torchvision.transforms as transforms
+from PIL import Image
+from dad.utils import get_best_device, sample_keypoints, check_not_i16
+from dad.types import Detector
+class DeDoDeDetector(Detector):
+    def __init__(
+        self,
+        *args,
+        encoder: nn.Module,
+        decoder: nn.Module,
+        resize: int,
+        nms_size: int,
+        subpixel: bool,
+        subpixel_temp: float,
+        keep_aspect_ratio: bool,
+        remove_borders: bool,
+        increase_coverage: bool,
+        coverage_pow: float,
+        coverage_size: int,
+        **kwargs,
+    ) -> None:
+        super().__init__(*args, **kwargs)
+        self.normalizer = transforms.Normalize(
+            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
+        )
+        self.encoder = encoder
+        self.decoder = decoder
+        self.remove_borders = remove_borders
+        self.resize = resize
+        self.increase_coverage = increase_coverage
+        self.coverage_pow = coverage_pow
+        self.coverage_size = coverage_size
+        self.nms_size = nms_size
+        self.keep_aspect_ratio = keep_aspect_ratio
+        self.subpixel = subpixel
+        self.subpixel_temp = subpixel_temp
+    @property
+    def topleft(self):
+        return 0.5
+    def forward_impl(
+        self,
+        images,
+    ):
+        features, sizes = self.encoder(images)
+        logits = 0
+        context = None
+        scales = ["8", "4", "2", "1"]
+        for idx, (feature_map, scale) in enumerate(zip(reversed(features), scales)):
+            delta_logits, context = self.decoder(
+                feature_map, context=context, scale=scale
+            )
+            logits = (
+                logits + delta_logits.float()
+            )  # ensure float (need bf16 doesnt have f.interpolate)
+            if idx < len(scales) - 1:
+                size = sizes[-(idx + 2)]
+                logits = F.interpolate(
+                    logits, size=size, mode="bicubic", align_corners=False
+                )
+                context = F.interpolate(
+                    context.float(), size=size, mode="bilinear", align_corners=False
+                )
+        return logits.float()
+    def forward(self, batch) -> dict[str, torch.Tensor]:
+        # wraps internal forward impl to handle
+        # different types of batches etc.
+        if "im_A" in batch:
+            images = torch.cat((batch["im_A"], batch["im_B"]))
+        else:
+            images = batch["image"]
+        scoremap = self.forward_impl(images)
+        return {"scoremap": scoremap}
+    @torch.inference_mode()
+    def detect(
+        self, batch, *, num_keypoints, return_dense_probs=False
+    ) -> dict[str, torch.Tensor]:
+        self.train(False)
+        scoremap = self.forward(batch)["scoremap"]
+        B, K, H, W = scoremap.shape
+        dense_probs = (
+            scoremap.reshape(B, K * H * W)
+            .softmax(dim=-1)
+            .reshape(B, K, H * W)
+            .sum(dim=1)
+        )
+        dense_probs = dense_probs.reshape(B, H, W)
+        keypoints, confidence = sample_keypoints(
+            dense_probs,
+            use_nms=True,
+            nms_size=self.nms_size,
+            sample_topk=True,
+            num_samples=num_keypoints,
+            return_probs=True,
+            increase_coverage=self.increase_coverage,
+            remove_borders=self.remove_borders,
+            coverage_pow=self.coverage_pow,
+            coverage_size=self.coverage_size,
+            subpixel=self.subpixel,
+            subpixel_temp=self.subpixel_temp,
+            scoremap=scoremap.reshape(B, H, W),
+        )
+        result = {"keypoints": keypoints, "keypoint_probs": confidence}
+        if return_dense_probs:
+            result["dense_probs"] = dense_probs
+        return result
+    def load_image(self, im_path, device=get_best_device()) -> dict[str, torch.Tensor]:
+        pil_im = Image.open(im_path)
+        check_not_i16(pil_im)
+        pil_im = pil_im.convert("RGB")
+        if self.keep_aspect_ratio:
+            W, H = pil_im.size
+            scale = self.resize / max(W, H)
+            W = int((scale * W) // 8 * 8)
+            H = int((scale * H) // 8 * 8)
+        else:
+            H, W = self.resize, self.resize
+        pil_im = pil_im.resize((W, H))
+        standard_im = np.array(pil_im) / 255.0
+        return {
+            "image": self.normalizer(torch.from_numpy(standard_im).permute(2, 0, 1))
+            .float()
+            .to(device)[None]
+        }
+class Decoder(nn.Module):
+    def __init__(
+        self, layers, *args, super_resolution=False, num_prototypes=1, **kwargs
+    ) -> None:
+        super().__init__(*args, **kwargs)
+        self.layers = layers
+        self.scales = self.layers.keys()
+        self.super_resolution = super_resolution
+        self.num_prototypes = num_prototypes
+    def forward(self, features, context=None, scale=None):
+        if context is not None:
+            features = torch.cat((features, context), dim=1)
+        stuff = self.layers[scale](features)
+        logits, context = (
+            stuff[:, : self.num_prototypes],
+            stuff[:, self.num_prototypes :],
+        )
+        return logits, context
+class ConvRefiner(nn.Module):
+    def __init__(
+        self,
+        in_dim=6,
+        hidden_dim=16,
+        out_dim=2,
+        dw=True,
+        kernel_size=5,
+        hidden_blocks=5,
+        amp=True,
+        residual=False,
+        amp_dtype=torch.float16,
+    ):
+        super().__init__()
+        self.block1 = self.create_block(
+            in_dim,
+            hidden_dim,
+            dw=False,
+            kernel_size=1,
+        )
+        self.hidden_blocks = nn.Sequential(
+            *[
+                self.create_block(
+                    hidden_dim,
+                    hidden_dim,
+                    dw=dw,
+                    kernel_size=kernel_size,
+                )
+                for hb in range(hidden_blocks)
+            ]
+        )
+        self.hidden_blocks = self.hidden_blocks
+        self.out_conv = nn.Conv2d(hidden_dim, out_dim, 1, 1, 0)
+        self.amp = amp
+        self.amp_dtype = amp_dtype
+        self.residual = residual
+    def create_block(
+        self,
+        in_dim,
+        out_dim,
+        dw=True,
+        kernel_size=5,
+        bias=True,
+        norm_type=nn.BatchNorm2d,
+    ):
+        num_groups = 1 if not dw else in_dim
+        if dw:
+            assert out_dim % in_dim == 0, (
+                "outdim must be divisible by indim for depthwise"
+            )
+        conv1 = nn.Conv2d(
+            in_dim,
+            out_dim,
+            kernel_size=kernel_size,
+            stride=1,
+            padding=kernel_size // 2,
+            groups=num_groups,
+            bias=bias,
+        )
+        norm = (
+            norm_type(out_dim)
+            if norm_type is nn.BatchNorm2d
+            else norm_type(num_channels=out_dim)
+        )
+        relu = nn.ReLU(inplace=True)
+        conv2 = nn.Conv2d(out_dim, out_dim, 1, 1, 0)
+        return nn.Sequential(conv1, norm, relu, conv2)
+    def forward(self, feats):
+        b, c, hs, ws = feats.shape
+        with torch.autocast(device_type=feats.device.type, enabled=self.amp, dtype=self.amp_dtype):
+            x0 = self.block1(feats)
+            x = self.hidden_blocks(x0)
+            if self.residual:
+                x = (x + x0) / 1.4
+            x = self.out_conv(x)
+            return x
+class VGG19(nn.Module):
+    def __init__(self, amp=False, amp_dtype=torch.float16) -> None:
+        super().__init__()
+        self.layers = nn.ModuleList(tvm.vgg19_bn().features[:40])
+        # Maxpool layers: 6, 13, 26, 39
+        self.amp = amp
+        self.amp_dtype = amp_dtype
+    def forward(self, x, **kwargs):
+        with torch.autocast(device_type=x.device.type, enabled=self.amp, dtype=self.amp_dtype):
+            feats = []
+            sizes = []
+            for layer in self.layers:
+                if isinstance(layer, nn.MaxPool2d):
+                    feats.append(x)
+                    sizes.append(x.shape[-2:])
+                x = layer(x)
+            return feats, sizes
+class VGG(nn.Module):
+    def __init__(self, size="19", amp=False, amp_dtype=torch.float16) -> None:
+        super().__init__()
+        if size == "11":
+            self.layers = nn.ModuleList(tvm.vgg11_bn().features[:22])
+        elif size == "13":
+            self.layers = nn.ModuleList(tvm.vgg13_bn().features[:28])
+        elif size == "19":
+            self.layers = nn.ModuleList(tvm.vgg19_bn().features[:40])
+        # Maxpool layers: 6, 13, 26, 39
+        self.amp = amp
+        self.amp_dtype = amp_dtype
+    def forward(self, x, **kwargs):
+        with torch.autocast(device_type=x.device.type, enabled=self.amp, dtype=self.amp_dtype):
+            feats = []
+            sizes = []
+            for layer in self.layers:
+                if isinstance(layer, nn.MaxPool2d):
+                    feats.append(x)
+                    sizes.append(x.shape[-2:])
+                x = layer(x)
+            return feats, sizes
+def dedode_detector_S():
+    residual = True
+    hidden_blocks = 3
+    amp_dtype = torch.float16
+    amp = True
+    NUM_PROTOTYPES = 1
+    conv_refiner = nn.ModuleDict(
+        {
+            "8": ConvRefiner(
+                512,
+                512,
+                256 + NUM_PROTOTYPES,
+                hidden_blocks=hidden_blocks,
+                residual=residual,
+                amp=amp,
+                amp_dtype=amp_dtype,
+            ),
+            "4": ConvRefiner(
+                256 + 256,
+                256,
+                128 + NUM_PROTOTYPES,
+                hidden_blocks=hidden_blocks,
+                residual=residual,
+                amp=amp,
+                amp_dtype=amp_dtype,
+            ),
+            "2": ConvRefiner(
+                128 + 128,
+                64,
+                32 + NUM_PROTOTYPES,
+                hidden_blocks=hidden_blocks,
+                residual=residual,
+                amp=amp,
+                amp_dtype=amp_dtype,
+            ),
+            "1": ConvRefiner(
+                64 + 32,
+                32,
+                1 + NUM_PROTOTYPES,
+                hidden_blocks=hidden_blocks,
+                residual=residual,
+                amp=amp,
+                amp_dtype=amp_dtype,
+            ),
+        }
+    )
+    encoder = VGG(size="11", amp=amp, amp_dtype=amp_dtype)
+    decoder = Decoder(conv_refiner)
+    return encoder, decoder
+def dedode_detector_B():
+    residual = True
+    hidden_blocks = 5
+    amp_dtype = torch.float16
+    amp = True
+    NUM_PROTOTYPES = 1
+    conv_refiner = nn.ModuleDict(
+        {
+            "8": ConvRefiner(
+                512,
+                512,
+                256 + NUM_PROTOTYPES,
+                hidden_blocks=hidden_blocks,
+                residual=residual,
+                amp=amp,
+                amp_dtype=amp_dtype,
+            ),
+            "4": ConvRefiner(
+                256 + 256,
+                256,
+                128 + NUM_PROTOTYPES,
+                hidden_blocks=hidden_blocks,
+                residual=residual,
+                amp=amp,
+                amp_dtype=amp_dtype,
+            ),
+            "2": ConvRefiner(
+                128 + 128,
+                64,
+                32 + NUM_PROTOTYPES,
+                hidden_blocks=hidden_blocks,
+                residual=residual,
+                amp=amp,
+                amp_dtype=amp_dtype,
+            ),
+            "1": ConvRefiner(
+                64 + 32,
+                32,
+                1 + NUM_PROTOTYPES,
+                hidden_blocks=hidden_blocks,
+                residual=residual,
+                amp=amp,
+                amp_dtype=amp_dtype,
+            ),
+        }
+    )
+    encoder = VGG19(amp=amp, amp_dtype=amp_dtype)
+    decoder = Decoder(conv_refiner)
+    return encoder, decoder
+def dedode_detector_L():
+    NUM_PROTOTYPES = 1
+    residual = True
+    hidden_blocks = 8
+    amp_dtype = (
+        torch.float16
+    )  # torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
+    amp = True
+    conv_refiner = nn.ModuleDict(
+        {
+            "8": ConvRefiner(
+                512,
+                512,
+                256 + NUM_PROTOTYPES,
+                hidden_blocks=hidden_blocks,
+                residual=residual,
+                amp=amp,
+                amp_dtype=amp_dtype,
+            ),
+            "4": ConvRefiner(
+                256 + 256,
+                256,
+                128 + NUM_PROTOTYPES,
+                hidden_blocks=hidden_blocks,
+                residual=residual,
+                amp=amp,
+                amp_dtype=amp_dtype,
+            ),
+            "2": ConvRefiner(
+                128 + 128,
+                128,
+                64 + NUM_PROTOTYPES,
+                hidden_blocks=hidden_blocks,
+                residual=residual,
+                amp=amp,
+                amp_dtype=amp_dtype,
+            ),
+            "1": ConvRefiner(
+                64 + 64,
+                64,
+                1 + NUM_PROTOTYPES,
+                hidden_blocks=hidden_blocks,
+                residual=residual,
+                amp=amp,
+                amp_dtype=amp_dtype,
+            ),
+        }
+    )
+    encoder = VGG19(amp=amp, amp_dtype=amp_dtype)
+    decoder = Decoder(conv_refiner)
+    return encoder, decoder
+class DaD(DeDoDeDetector):
+    def __init__(
+        self,
+        encoder: nn.Module,
+        decoder: nn.Module,
+        *args,
+        resize=1024,
+        nms_size=3,
+        remove_borders=False,
+        increase_coverage=False,
+        coverage_pow=None,
+        coverage_size=None,
+        subpixel=True,
+        subpixel_temp=0.5,
+        keep_aspect_ratio=True,
+        **kwargs,
+    ) -> None:
+        super().__init__(
+            *args,
+            encoder=encoder,
+            decoder=decoder,
+            resize=resize,
+            nms_size=nms_size,
+            remove_borders=remove_borders,
+            increase_coverage=increase_coverage,
+            coverage_pow=coverage_pow,
+            coverage_size=coverage_size,
+            subpixel=subpixel,
+            keep_aspect_ratio=keep_aspect_ratio,
+            subpixel_temp=subpixel_temp,
+            **kwargs,
+        )
+class DeDoDev2(DeDoDeDetector):
+    def __init__(
+        self,
+        encoder: nn.Module,
+        decoder: nn.Module,
+        *args,
+        resize=784,
+        nms_size=3,
+        remove_borders=False,
+        increase_coverage=True,
+        coverage_pow=0.5,
+        coverage_size=51,
+        subpixel=False,
+        subpixel_temp=None,
+        keep_aspect_ratio=False,
+        **kwargs,
+    ) -> None:
+        super().__init__(
+            *args,
+            encoder=encoder,
+            decoder=decoder,
+            resize=resize,
+            nms_size=nms_size,
+            remove_borders=remove_borders,
+            increase_coverage=increase_coverage,
+            coverage_pow=coverage_pow,
+            coverage_size=coverage_size,
+            subpixel=subpixel,
+            keep_aspect_ratio=keep_aspect_ratio,
+            subpixel_temp=subpixel_temp,
+            **kwargs,
+        )
+def load_DaD(resize=1024, pretrained=True, weights_path=None) -> DaD:
+    if weights_path is None:
+        weights_path = (
+            "https://github.com/Parskatt/dad/releases/download/v0.1.0/dad.pth"
+        )
+    device = get_best_device()
+    encoder, decoder = dedode_detector_S()
+    model = DaD(encoder, decoder, resize=resize).to(device)
+    if pretrained:
+        weights = torch.hub.load_state_dict_from_url(
+            weights_path, weights_only=False, map_location=device
+        )
+        model.load_state_dict(weights)
+    return model
+def load_DaDLight(resize=1024, weights_path=None) -> DaD:
+    if weights_path is None:
+        weights_path = (
+            "https://github.com/Parskatt/dad/releases/download/v0.1.0/dad_light.pth"
+        )
+    return load_DaD(
+        resize=resize,
+        pretrained=True,
+        weights_path=weights_path,
+    )
+def load_DaDDark(resize=1024, weights_path=None) -> DaD:
+    if weights_path is None:
+        weights_path = (
+            "https://github.com/Parskatt/dad/releases/download/v0.1.0/dad_dark.pth"
+        )
+    return load_DaD(
+        resize=resize,
+        pretrained=True,
+        weights_path=weights_path,
+    )
+def load_dedode_v2() -> DeDoDev2:
+    device = get_best_device()
+    weights = torch.hub.load_state_dict_from_url(
+        "https://github.com/Parskatt/DeDoDe/releases/download/v2/dedode_detector_L_v2.pth",
+        map_location=device,
+    )
+    encoder, decoder = dedode_detector_L()
+    model = DeDoDev2(encoder, decoder).to(device)
+    model.load_state_dict(weights)
+    return model

imcui/third_party/dad/dad/detectors/third_party/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from .lightglue_detector import LightGlueDetector as LightGlueDetector
+from .lightglue import SuperPoint as SuperPoint
+from .lightglue import ReinforcedFP as ReinforcedFP
+from .lightglue import DISK as DISK
+from .lightglue import ALIKED as ALIKED
+from .lightglue import ALIKEDROT as ALIKEDROT
+from .lightglue import SIFT as SIFT
+from .lightglue import DoGHardNet as DoGHardNet
+from .hesaff import HesAff as HesAff
+from .harrisaff import HarrisAff as HarrisAff
+from .rekd.rekd import load_REKD as load_REKD

imcui/third_party/dad/dad/detectors/third_party/harrisaff.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import numpy as np
+import torch
+from dad.types import Detector
+import cv2
+from dad.utils import get_best_device
+class HarrisAff(Detector):
+    def __init__(self):
+        super().__init__()
+        self.detector = cv2.xfeatures2d.HarrisLaplaceFeatureDetector_create(
+            numOctaves=6, corn_thresh=0.0, DOG_thresh=0.0, maxCorners=8192, num_layers=4
+        )
+    @property
+    def topleft(self):
+        return 0.0
+    def load_image(self, im_path):
+        return {"image": cv2.imread(im_path, cv2.IMREAD_GRAYSCALE)}
+    @torch.inference_mode()
+    def detect(self, batch, *, num_keypoints, return_dense_probs=False) -> dict[str, torch.Tensor]:
+        img = batch["image"]
+        H, W = img.shape
+        # Detect keypoints
+        kps = self.detector.detect(img)
+        kps = np.array([kp.pt for kp in kps])[:num_keypoints]
+        kps_n = self.to_normalized_coords(torch.from_numpy(kps), H, W)[None]
+        detections = {"keypoints": kps_n.to(get_best_device()).float(), "keypoint_probs": None}
+        if return_dense_probs:
+            detections["dense_probs"] = None
+        return detections

imcui/third_party/dad/dad/detectors/third_party/hesaff.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from PIL import Image
+import torch
+from dad.utils import get_best_device
+from dad.types import Detector
+class HesAff(Detector):
+    def __init__(self):
+        raise NotImplementedError("Buggy implementation, don't use.")
+        super().__init__()
+        import pyhesaff
+        self.params = pyhesaff.get_hesaff_default_params()
+    @property
+    def topleft(self):
+        return 0.0
+    def load_image(self, im_path):
+        # pyhesaff doesn't seem to have a decoupled image loading and detection stage
+        # so load_image here is just identity
+        return {"image": im_path}
+    def detect(self, batch, *, num_keypoints, return_dense_probs=False):
+        import pyhesaff
+        im_path = batch["image"]
+        W, H = Image.open(im_path).size
+        detections = pyhesaff.detect_feats(im_path)[0][:num_keypoints]
+        kps = detections[..., :2]
+        kps_n = self.to_normalized_coords(torch.from_numpy(kps), H, W)[None]
+        result = {
+            "keypoints": kps_n.to(get_best_device()).float(),
+            "keypoint_probs": None,
+        }
+        if return_dense_probs is not None:
+            result["dense_probs"] = None
+        return result

imcui/third_party/dad/dad/detectors/third_party/lightglue/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from .aliked import ALIKED  # noqa
+from .aliked import ALIKEDROT as ALIKEDROT  # noqa
+from .disk import DISK  # noqa
+from .dog_hardnet import DoGHardNet  # noqa
+from .lightglue import LightGlue  # noqa
+from .sift import SIFT  # noqa
+from .superpoint import SuperPoint  # noqa
+from .superpoint import ReinforcedFP  # noqa
+from .utils import match_pair  # noqa

imcui/third_party/dad/dad/detectors/third_party/lightglue/aliked.py ADDED Viewed

	@@ -0,0 +1,770 @@

+# BSD 3-Clause License
+# Copyright (c) 2022, Zhao Xiaoming
+# All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+# 3. Neither the name of the copyright holder nor the names of its
+#    contributors may be used to endorse or promote products derived from
+#    this software without specific prior written permission.
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# Authors:
+# Xiaoming Zhao, Xingming Wu, Weihai Chen, Peter C.Y. Chen, Qingsong Xu, and Zhengguo Li
+# Code from https://github.com/Shiaoming/ALIKED
+from typing import Callable, Optional
+import torch
+import torch.nn.functional as F
+import torchvision
+from kornia.color import grayscale_to_rgb
+from torch import nn
+from torch.nn.modules.utils import _pair
+from torchvision.models import resnet
+from .utils import Extractor
+def get_patches(
+    tensor: torch.Tensor, required_corners: torch.Tensor, ps: int
+) -> torch.Tensor:
+    c, h, w = tensor.shape
+    corner = (required_corners - ps / 2 + 1).long()
+    corner[:, 0] = corner[:, 0].clamp(min=0, max=w - 1 - ps)
+    corner[:, 1] = corner[:, 1].clamp(min=0, max=h - 1 - ps)
+    offset = torch.arange(0, ps)
+    kw = {"indexing": "ij"} if torch.__version__ >= "1.10" else {}
+    x, y = torch.meshgrid(offset, offset, **kw)
+    patches = torch.stack((x, y)).permute(2, 1, 0).unsqueeze(2)
+    patches = patches.to(corner) + corner[None, None]
+    pts = patches.reshape(-1, 2)
+    sampled = tensor.permute(1, 2, 0)[tuple(pts.T)[::-1]]
+    sampled = sampled.reshape(ps, ps, -1, c)
+    assert sampled.shape[:3] == patches.shape[:3]
+    return sampled.permute(2, 3, 0, 1)
+def simple_nms(scores: torch.Tensor, nms_radius: int):
+    """Fast Non-maximum suppression to remove nearby points"""
+    zeros = torch.zeros_like(scores)
+    max_mask = scores == torch.nn.functional.max_pool2d(
+        scores, kernel_size=nms_radius * 2 + 1, stride=1, padding=nms_radius
+    )
+    for _ in range(2):
+        supp_mask = (
+            torch.nn.functional.max_pool2d(
+                max_mask.float(),
+                kernel_size=nms_radius * 2 + 1,
+                stride=1,
+                padding=nms_radius,
+            )
+            > 0
+        )
+        supp_scores = torch.where(supp_mask, zeros, scores)
+        new_max_mask = supp_scores == torch.nn.functional.max_pool2d(
+            supp_scores, kernel_size=nms_radius * 2 + 1, stride=1, padding=nms_radius
+        )
+        max_mask = max_mask | (new_max_mask & (~supp_mask))
+    return torch.where(max_mask, scores, zeros)
+class DKD(nn.Module):
+    def __init__(
+        self,
+        radius: int = 2,
+        top_k: int = 0,
+        scores_th: float = 0.2,
+        n_limit: int = 20000,
+    ):
+        """
+        Args:
+            radius: soft detection radius, kernel size is (2 * radius + 1)
+            top_k: top_k > 0: return top k keypoints
+            scores_th: top_k <= 0 threshold mode:
+                scores_th > 0: return keypoints with scores>scores_th
+                else: return keypoints with scores > scores.mean()
+            n_limit: max number of keypoint in threshold mode
+        """
+        super().__init__()
+        self.radius = radius
+        self.top_k = top_k
+        self.scores_th = scores_th
+        self.n_limit = n_limit
+        self.kernel_size = 2 * self.radius + 1
+        self.temperature = 0.1  # tuned temperature
+        self.unfold = nn.Unfold(kernel_size=self.kernel_size, padding=self.radius)
+        # local xy grid
+        x = torch.linspace(-self.radius, self.radius, self.kernel_size)
+        # (kernel_size*kernel_size) x 2 : (w,h)
+        kw = {"indexing": "ij"} if torch.__version__ >= "1.10" else {}
+        self.hw_grid = (
+            torch.stack(torch.meshgrid([x, x], **kw)).view(2, -1).t()[:, [1, 0]]
+        )
+    def forward(
+        self,
+        scores_map: torch.Tensor,
+        sub_pixel: bool = True,
+        image_size: Optional[torch.Tensor] = None,
+    ):
+        """
+        :param scores_map: Bx1xHxW
+        :param descriptor_map: BxCxHxW
+        :param sub_pixel: whether to use sub-pixel keypoint detection
+        :return: kpts: list[Nx2,...]; kptscores: list[N,....] normalised position: -1~1
+        """
+        b, c, h, w = scores_map.shape
+        scores_nograd = scores_map.detach()
+        nms_scores = simple_nms(scores_nograd, self.radius)
+        # remove border
+        nms_scores[:, :, : self.radius, :] = 0
+        nms_scores[:, :, :, : self.radius] = 0
+        if image_size is not None:
+            for i in range(scores_map.shape[0]):
+                w, h = image_size[i].long()
+                nms_scores[i, :, h.item() - self.radius :, :] = 0
+                nms_scores[i, :, :, w.item() - self.radius :] = 0
+        else:
+            nms_scores[:, :, -self.radius :, :] = 0
+            nms_scores[:, :, :, -self.radius :] = 0
+        # detect keypoints without grad
+        if self.top_k > 0:
+            topk = torch.topk(nms_scores.view(b, -1), self.top_k)
+            indices_keypoints = [topk.indices[i] for i in range(b)]  # B x top_k
+        else:
+            if self.scores_th > 0:
+                masks = nms_scores > self.scores_th
+                if masks.sum() == 0:
+                    th = scores_nograd.reshape(b, -1).mean(dim=1)  # th = self.scores_th
+                    masks = nms_scores > th.reshape(b, 1, 1, 1)
+            else:
+                th = scores_nograd.reshape(b, -1).mean(dim=1)  # th = self.scores_th
+                masks = nms_scores > th.reshape(b, 1, 1, 1)
+            masks = masks.reshape(b, -1)
+            indices_keypoints = []  # list, B x (any size)
+            scores_view = scores_nograd.reshape(b, -1)
+            for mask, scores in zip(masks, scores_view):
+                indices = mask.nonzero()[:, 0]
+                if len(indices) > self.n_limit:
+                    kpts_sc = scores[indices]
+                    sort_idx = kpts_sc.sort(descending=True)[1]
+                    sel_idx = sort_idx[: self.n_limit]
+                    indices = indices[sel_idx]
+                indices_keypoints.append(indices)
+        wh = torch.tensor([w - 1, h - 1], device=scores_nograd.device)
+        keypoints = []
+        scoredispersitys = []
+        kptscores = []
+        if sub_pixel:
+            # detect soft keypoints with grad backpropagation
+            patches = self.unfold(scores_map)  # B x (kernel**2) x (H*W)
+            # print(patches.shape)
+            self.hw_grid = self.hw_grid.to(scores_map)  # to device
+            for b_idx in range(b):
+                patch = patches[b_idx].t()  # (H*W) x (kernel**2)
+                indices_kpt = indices_keypoints[
+                    b_idx
+                ]  # one dimension vector, say its size is M
+                patch_scores = patch[indices_kpt]  # M x (kernel**2)
+                keypoints_xy_nms = torch.stack(
+                    [indices_kpt % w, torch.div(indices_kpt, w, rounding_mode="trunc")],
+                    dim=1,
+                )  # Mx2
+                # max is detached to prevent undesired backprop loops in the graph
+                max_v = patch_scores.max(dim=1).values.detach()[:, None]
+                x_exp = (
+                    (patch_scores - max_v) / self.temperature
+                ).exp()  # M * (kernel**2), in [0, 1]
+                # \frac{ \sum{(i,j) \times \exp(x/T)} }{ \sum{\exp(x/T)} }
+                xy_residual = (
+                    x_exp @ self.hw_grid / x_exp.sum(dim=1)[:, None]
+                )  # Soft-argmax, Mx2
+                hw_grid_dist2 = (
+                    torch.norm(
+                        (self.hw_grid[None, :, :] - xy_residual[:, None, :])
+                        / self.radius,
+                        dim=-1,
+                    )
+                    ** 2
+                )
+                scoredispersity = (x_exp * hw_grid_dist2).sum(dim=1) / x_exp.sum(dim=1)
+                # compute result keypoints
+                keypoints_xy = keypoints_xy_nms + xy_residual
+                keypoints_xy = keypoints_xy / wh * 2 - 1  # (w,h) -> (-1~1,-1~1)
+                kptscore = torch.nn.functional.grid_sample(
+                    scores_map[b_idx].unsqueeze(0),
+                    keypoints_xy.view(1, 1, -1, 2),
+                    mode="bilinear",
+                    align_corners=True,
+                )[0, 0, 0, :]  # CxN
+                keypoints.append(keypoints_xy)
+                scoredispersitys.append(scoredispersity)
+                kptscores.append(kptscore)
+        else:
+            for b_idx in range(b):
+                indices_kpt = indices_keypoints[
+                    b_idx
+                ]  # one dimension vector, say its size is M
+                # To avoid warning: UserWarning: __floordiv__ is deprecated
+                keypoints_xy_nms = torch.stack(
+                    [indices_kpt % w, torch.div(indices_kpt, w, rounding_mode="trunc")],
+                    dim=1,
+                )  # Mx2
+                keypoints_xy = keypoints_xy_nms / wh * 2 - 1  # (w,h) -> (-1~1,-1~1)
+                kptscore = torch.nn.functional.grid_sample(
+                    scores_map[b_idx].unsqueeze(0),
+                    keypoints_xy.view(1, 1, -1, 2),
+                    mode="bilinear",
+                    align_corners=True,
+                )[0, 0, 0, :]  # CxN
+                keypoints.append(keypoints_xy)
+                scoredispersitys.append(kptscore)  # for jit.script compatability
+                kptscores.append(kptscore)
+        return keypoints, scoredispersitys, kptscores
+class InputPadder(object):
+    """Pads images such that dimensions are divisible by 8"""
+    def __init__(self, h: int, w: int, divis_by: int = 8):
+        self.ht = h
+        self.wd = w
+        pad_ht = (((self.ht // divis_by) + 1) * divis_by - self.ht) % divis_by
+        pad_wd = (((self.wd // divis_by) + 1) * divis_by - self.wd) % divis_by
+        self._pad = [
+            pad_wd // 2,
+            pad_wd - pad_wd // 2,
+            pad_ht // 2,
+            pad_ht - pad_ht // 2,
+        ]
+    def pad(self, x: torch.Tensor):
+        assert x.ndim == 4
+        return F.pad(x, self._pad, mode="replicate")
+    def unpad(self, x: torch.Tensor):
+        assert x.ndim == 4
+        ht = x.shape[-2]
+        wd = x.shape[-1]
+        c = [self._pad[2], ht - self._pad[3], self._pad[0], wd - self._pad[1]]
+        return x[..., c[0] : c[1], c[2] : c[3]]
+class DeformableConv2d(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size=3,
+        stride=1,
+        padding=1,
+        bias=False,
+        mask=False,
+    ):
+        super(DeformableConv2d, self).__init__()
+        self.padding = padding
+        self.mask = mask
+        self.channel_num = (
+            3 * kernel_size * kernel_size if mask else 2 * kernel_size * kernel_size
+        )
+        self.offset_conv = nn.Conv2d(
+            in_channels,
+            self.channel_num,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=self.padding,
+            bias=True,
+        )
+        self.regular_conv = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=self.padding,
+            bias=bias,
+        )
+    def forward(self, x):
+        h, w = x.shape[2:]
+        max_offset = max(h, w) / 4.0
+        out = self.offset_conv(x)
+        if self.mask:
+            o1, o2, mask = torch.chunk(out, 3, dim=1)
+            offset = torch.cat((o1, o2), dim=1)
+            mask = torch.sigmoid(mask)
+        else:
+            offset = out
+            mask = None
+        offset = offset.clamp(-max_offset, max_offset)
+        x = torchvision.ops.deform_conv2d(
+            input=x,
+            offset=offset,
+            weight=self.regular_conv.weight,
+            bias=self.regular_conv.bias,
+            padding=self.padding,
+            mask=mask,
+        )
+        return x
+def get_conv(
+    inplanes,
+    planes,
+    kernel_size=3,
+    stride=1,
+    padding=1,
+    bias=False,
+    conv_type="conv",
+    mask=False,
+):
+    if conv_type == "conv":
+        conv = nn.Conv2d(
+            inplanes,
+            planes,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            bias=bias,
+        )
+    elif conv_type == "dcn":
+        conv = DeformableConv2d(
+            inplanes,
+            planes,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=_pair(padding),
+            bias=bias,
+            mask=mask,
+        )
+    else:
+        raise TypeError
+    return conv
+class ConvBlock(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        gate: Optional[Callable[..., nn.Module]] = None,
+        norm_layer: Optional[Callable[..., nn.Module]] = None,
+        conv_type: str = "conv",
+        mask: bool = False,
+    ):
+        super().__init__()
+        if gate is None:
+            self.gate = nn.ReLU(inplace=True)
+        else:
+            self.gate = gate
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self.conv1 = get_conv(
+            in_channels, out_channels, kernel_size=3, conv_type=conv_type, mask=mask
+        )
+        self.bn1 = norm_layer(out_channels)
+        self.conv2 = get_conv(
+            out_channels, out_channels, kernel_size=3, conv_type=conv_type, mask=mask
+        )
+        self.bn2 = norm_layer(out_channels)
+    def forward(self, x):
+        x = self.gate(self.bn1(self.conv1(x)))  # B x in_channels x H x W
+        x = self.gate(self.bn2(self.conv2(x)))  # B x out_channels x H x W
+        return x
+# modified based on torchvision\models\resnet.py#27->BasicBlock
+class ResBlock(nn.Module):
+    expansion: int = 1
+    def __init__(
+        self,
+        inplanes: int,
+        planes: int,
+        stride: int = 1,
+        downsample: Optional[nn.Module] = None,
+        groups: int = 1,
+        base_width: int = 64,
+        dilation: int = 1,
+        gate: Optional[Callable[..., nn.Module]] = None,
+        norm_layer: Optional[Callable[..., nn.Module]] = None,
+        conv_type: str = "conv",
+        mask: bool = False,
+    ) -> None:
+        super(ResBlock, self).__init__()
+        if gate is None:
+            self.gate = nn.ReLU(inplace=True)
+        else:
+            self.gate = gate
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        if groups != 1 or base_width != 64:
+            raise ValueError("ResBlock only supports groups=1 and base_width=64")
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in ResBlock")
+        # Both self.conv1 and self.downsample layers
+        # downsample the input when stride != 1
+        self.conv1 = get_conv(
+            inplanes, planes, kernel_size=3, conv_type=conv_type, mask=mask
+        )
+        self.bn1 = norm_layer(planes)
+        self.conv2 = get_conv(
+            planes, planes, kernel_size=3, conv_type=conv_type, mask=mask
+        )
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.gate(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.gate(out)
+        return out
+class SDDH(nn.Module):
+    def __init__(
+        self,
+        dims: int,
+        kernel_size: int = 3,
+        n_pos: int = 8,
+        gate=nn.ReLU(),
+        conv2D=False,
+        mask=False,
+    ):
+        super(SDDH, self).__init__()
+        self.kernel_size = kernel_size
+        self.n_pos = n_pos
+        self.conv2D = conv2D
+        self.mask = mask
+        self.get_patches_func = get_patches
+        # estimate offsets
+        self.channel_num = 3 * n_pos if mask else 2 * n_pos
+        self.offset_conv = nn.Sequential(
+            nn.Conv2d(
+                dims,
+                self.channel_num,
+                kernel_size=kernel_size,
+                stride=1,
+                padding=0,
+                bias=True,
+            ),
+            gate,
+            nn.Conv2d(
+                self.channel_num,
+                self.channel_num,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                bias=True,
+            ),
+        )
+        # sampled feature conv
+        self.sf_conv = nn.Conv2d(
+            dims, dims, kernel_size=1, stride=1, padding=0, bias=False
+        )
+        # convM
+        if not conv2D:
+            # deformable desc weights
+            agg_weights = torch.nn.Parameter(torch.rand(n_pos, dims, dims))
+            self.register_parameter("agg_weights", agg_weights)
+        else:
+            self.convM = nn.Conv2d(
+                dims * n_pos, dims, kernel_size=1, stride=1, padding=0, bias=False
+            )
+    def forward(self, x, keypoints):
+        # x: [B,C,H,W]
+        # keypoints: list, [[N_kpts,2], ...] (w,h)
+        b, c, h, w = x.shape
+        wh = torch.tensor([[w - 1, h - 1]], device=x.device)
+        max_offset = max(h, w) / 4.0
+        offsets = []
+        descriptors = []
+        # get offsets for each keypoint
+        for ib in range(b):
+            xi, kptsi = x[ib], keypoints[ib]
+            kptsi_wh = (kptsi / 2 + 0.5) * wh
+            N_kpts = len(kptsi)
+            if self.kernel_size > 1:
+                patch = self.get_patches_func(
+                    xi, kptsi_wh.long(), self.kernel_size
+                )  # [N_kpts, C, K, K]
+            else:
+                kptsi_wh_long = kptsi_wh.long()
+                patch = (
+                    xi[:, kptsi_wh_long[:, 1], kptsi_wh_long[:, 0]]
+                    .permute(1, 0)
+                    .reshape(N_kpts, c, 1, 1)
+                )
+            offset = self.offset_conv(patch).clamp(
+                -max_offset, max_offset
+            )  # [N_kpts, 2*n_pos, 1, 1]
+            if self.mask:
+                offset = (
+                    offset[:, :, 0, 0].view(N_kpts, 3, self.n_pos).permute(0, 2, 1)
+                )  # [N_kpts, n_pos, 3]
+                offset = offset[:, :, :-1]  # [N_kpts, n_pos, 2]
+                mask_weight = torch.sigmoid(offset[:, :, -1])  # [N_kpts, n_pos]
+            else:
+                offset = (
+                    offset[:, :, 0, 0].view(N_kpts, 2, self.n_pos).permute(0, 2, 1)
+                )  # [N_kpts, n_pos, 2]
+            offsets.append(offset)  # for visualization
+            # get sample positions
+            pos = kptsi_wh.unsqueeze(1) + offset  # [N_kpts, n_pos, 2]
+            pos = 2.0 * pos / wh[None] - 1
+            pos = pos.reshape(1, N_kpts * self.n_pos, 1, 2)
+            # sample features
+            features = F.grid_sample(
+                xi.unsqueeze(0), pos, mode="bilinear", align_corners=True
+            )  # [1,C,(N_kpts*n_pos),1]
+            features = features.reshape(c, N_kpts, self.n_pos, 1).permute(
+                1, 0, 2, 3
+            )  # [N_kpts, C, n_pos, 1]
+            if self.mask:
+                features = torch.einsum("ncpo,np->ncpo", features, mask_weight)
+            features = torch.selu_(self.sf_conv(features)).squeeze(
+                -1
+            )  # [N_kpts, C, n_pos]
+            # convM
+            if not self.conv2D:
+                descs = torch.einsum(
+                    "ncp,pcd->nd", features, self.agg_weights
+                )  # [N_kpts, C]
+            else:
+                features = features.reshape(N_kpts, -1)[
+                    :, :, None, None
+                ]  # [N_kpts, C*n_pos, 1, 1]
+                descs = self.convM(features).squeeze()  # [N_kpts, C]
+            # normalize
+            descs = F.normalize(descs, p=2.0, dim=1)
+            descriptors.append(descs)
+        return descriptors, offsets
+class ALIKED(Extractor):
+    default_conf = {
+        "model_name": "aliked-n16",
+        "max_num_keypoints": -1,
+        "detection_threshold": 0.2,
+        "nms_radius": 2,
+    }
+    checkpoint_url = "https://github.com/Shiaoming/ALIKED/raw/main/models/{}.pth"
+    n_limit_max = 20000
+    # c1, c2, c3, c4, dim, K, M
+    cfgs = {
+        "aliked-t16": [8, 16, 32, 64, 64, 3, 16],
+        "aliked-n16": [16, 32, 64, 128, 128, 3, 16],
+        "aliked-n16rot": [16, 32, 64, 128, 128, 3, 16],
+        "aliked-n32": [16, 32, 64, 128, 128, 3, 32],
+    }
+    preprocess_conf = {
+        "resize": 1024,
+    }
+    required_data_keys = ["image"]
+    def __init__(self, **conf):
+        super().__init__(**conf)  # Update with default configuration.
+        conf = self.conf
+        c1, c2, c3, c4, dim, K, M = self.cfgs[conf.model_name]
+        conv_types = ["conv", "conv", "dcn", "dcn"]
+        conv2D = False
+        mask = False
+        # build model
+        self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2)
+        self.pool4 = nn.AvgPool2d(kernel_size=4, stride=4)
+        self.norm = nn.BatchNorm2d
+        self.gate = nn.SELU(inplace=True)
+        self.block1 = ConvBlock(3, c1, self.gate, self.norm, conv_type=conv_types[0])
+        self.block2 = self.get_resblock(c1, c2, conv_types[1], mask)
+        self.block3 = self.get_resblock(c2, c3, conv_types[2], mask)
+        self.block4 = self.get_resblock(c3, c4, conv_types[3], mask)
+        self.conv1 = resnet.conv1x1(c1, dim // 4)
+        self.conv2 = resnet.conv1x1(c2, dim // 4)
+        self.conv3 = resnet.conv1x1(c3, dim // 4)
+        self.conv4 = resnet.conv1x1(dim, dim // 4)
+        self.upsample2 = nn.Upsample(
+            scale_factor=2, mode="bilinear", align_corners=True
+        )
+        self.upsample4 = nn.Upsample(
+            scale_factor=4, mode="bilinear", align_corners=True
+        )
+        self.upsample8 = nn.Upsample(
+            scale_factor=8, mode="bilinear", align_corners=True
+        )
+        self.upsample32 = nn.Upsample(
+            scale_factor=32, mode="bilinear", align_corners=True
+        )
+        self.score_head = nn.Sequential(
+            resnet.conv1x1(dim, 8),
+            self.gate,
+            resnet.conv3x3(8, 4),
+            self.gate,
+            resnet.conv3x3(4, 4),
+            self.gate,
+            resnet.conv3x3(4, 1),
+        )
+        self.desc_head = SDDH(dim, K, M, gate=self.gate, conv2D=conv2D, mask=mask)
+        self.dkd = DKD(
+            radius=conf.nms_radius,
+            top_k=-1 if conf.detection_threshold > 0 else conf.max_num_keypoints,
+            scores_th=conf.detection_threshold,
+            n_limit=conf.max_num_keypoints
+            if conf.max_num_keypoints > 0
+            else self.n_limit_max,
+        )
+        state_dict = torch.hub.load_state_dict_from_url(
+            self.checkpoint_url.format(conf.model_name), map_location="cpu"
+        )
+        self.load_state_dict(state_dict, strict=True)
+    def get_resblock(self, c_in, c_out, conv_type, mask):
+        return ResBlock(
+            c_in,
+            c_out,
+            1,
+            nn.Conv2d(c_in, c_out, 1),
+            gate=self.gate,
+            norm_layer=self.norm,
+            conv_type=conv_type,
+            mask=mask,
+        )
+    def extract_dense_map(self, image):
+        # Pads images such that dimensions are divisible by
+        div_by = 2**5
+        padder = InputPadder(image.shape[-2], image.shape[-1], div_by)
+        image = padder.pad(image)
+        # ================================== feature encoder
+        x1 = self.block1(image)  # B x c1 x H x W
+        x2 = self.pool2(x1)
+        x2 = self.block2(x2)  # B x c2 x H/2 x W/2
+        x3 = self.pool4(x2)
+        x3 = self.block3(x3)  # B x c3 x H/8 x W/8
+        x4 = self.pool4(x3)
+        x4 = self.block4(x4)  # B x dim x H/32 x W/32
+        # ================================== feature aggregation
+        x1 = self.gate(self.conv1(x1))  # B x dim//4 x H x W
+        x2 = self.gate(self.conv2(x2))  # B x dim//4 x H//2 x W//2
+        x3 = self.gate(self.conv3(x3))  # B x dim//4 x H//8 x W//8
+        x4 = self.gate(self.conv4(x4))  # B x dim//4 x H//32 x W//32
+        x2_up = self.upsample2(x2)  # B x dim//4 x H x W
+        x3_up = self.upsample8(x3)  # B x dim//4 x H x W
+        x4_up = self.upsample32(x4)  # B x dim//4 x H x W
+        x1234 = torch.cat([x1, x2_up, x3_up, x4_up], dim=1)
+        # ================================== score head
+        score_map = torch.sigmoid(self.score_head(x1234))
+        feature_map = torch.nn.functional.normalize(x1234, p=2, dim=1)
+        # Unpads images
+        feature_map = padder.unpad(feature_map)
+        score_map = padder.unpad(score_map)
+        return feature_map, score_map
+    def forward(self, data: dict) -> dict:
+        # need to set here unfortunately
+        self.dkd.n_limit = (
+            self.conf.max_num_keypoints
+            if self.conf.max_num_keypoints > 0
+            else self.n_limit_max
+        )
+        image = data["image"]
+        if image.shape[1] == 1:
+            image = grayscale_to_rgb(image)
+        feature_map, score_map = self.extract_dense_map(image)
+        keypoints, kptscores, scoredispersitys = self.dkd(
+            score_map, image_size=data.get("image_size")
+        )
+        # descriptors, offsets = self.desc_head(feature_map, keypoints)
+        _, _, h, w = image.shape
+        wh = torch.tensor([w - 1, h - 1], device=image.device)
+        # no padding required
+        # we can set detection_threshold=-1 and conf.max_num_keypoints > 0
+        return {
+            "keypoints": wh * (torch.stack(keypoints) + 1) / 2.0,  # B x N x 2
+            # "descriptors": torch.stack(descriptors),  # B x N x D
+            "keypoint_scores": torch.stack(kptscores),  # B x N
+            "scoremap": score_map,  # B x 1 x H x W
+        }
+class ALIKEDROT(ALIKED):
+    default_conf = {
+        "model_name": "aliked-n16rot",
+        "max_num_keypoints": -1,
+        "detection_threshold": 0.2,
+        "nms_radius": 2,
+    }

imcui/third_party/dad/dad/detectors/third_party/lightglue/disk.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import kornia
+import torch
+from .utils import Extractor
+class DISK(Extractor):
+    default_conf = {
+        "weights": "depth",
+        "max_num_keypoints": None,
+        "desc_dim": 128,
+        "nms_window_size": 5,
+        "detection_threshold": 0.0,
+        "pad_if_not_divisible": True,
+    }
+    preprocess_conf = {
+        "resize": 1024,
+        "grayscale": False,
+    }
+    required_data_keys = ["image"]
+    def __init__(self, **conf) -> None:
+        super().__init__(**conf)  # Update with default configuration.
+        self.model = kornia.feature.DISK.from_pretrained(self.conf.weights)
+    def forward(self, data: dict) -> dict:
+        """Compute keypoints, scores, descriptors for image"""
+        for key in self.required_data_keys:
+            assert key in data, f"Missing key {key} in data"
+        image = data["image"]
+        if image.shape[1] == 1:
+            image = kornia.color.grayscale_to_rgb(image)
+        features = self.model(
+            image,
+            n=self.conf.max_num_keypoints,
+            window_size=self.conf.nms_window_size,
+            score_threshold=self.conf.detection_threshold,
+            pad_if_not_divisible=self.conf.pad_if_not_divisible,
+        )
+        keypoints = [f.keypoints for f in features]
+        keypoints = torch.stack(keypoints, 0)
+        return {
+            "keypoints": keypoints.to(image).contiguous(),
+        }

imcui/third_party/dad/dad/detectors/third_party/lightglue/dog_hardnet.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import torch
+from kornia.color import rgb_to_grayscale
+from kornia.feature import HardNet, LAFDescriptor, laf_from_center_scale_ori
+from .sift import SIFT
+class DoGHardNet(SIFT):
+    required_data_keys = ["image"]
+    def __init__(self, **conf):
+        super().__init__(**conf)
+        self.laf_desc = LAFDescriptor(HardNet(True)).eval()
+    def forward(self, data: dict) -> dict:
+        image = data["image"]
+        if image.shape[1] == 3:
+            image = rgb_to_grayscale(image)
+        device = image.device
+        self.laf_desc = self.laf_desc.to(device)
+        self.laf_desc.descriptor = self.laf_desc.descriptor.eval()
+        pred = []
+        if "image_size" in data.keys():
+            im_size = data.get("image_size").long()
+        else:
+            im_size = None
+        for k in range(len(image)):
+            img = image[k]
+            if im_size is not None:
+                w, h = data["image_size"][k]
+                img = img[:, : h.to(torch.int32), : w.to(torch.int32)]
+            p = self.extract_single_image(img)
+            lafs = laf_from_center_scale_ori(
+                p["keypoints"].reshape(1, -1, 2),
+                6.0 * p["scales"].reshape(1, -1, 1, 1),
+                torch.rad2deg(p["oris"]).reshape(1, -1, 1),
+            ).to(device)
+            p["descriptors"] = self.laf_desc(img[None], lafs).reshape(-1, 128)
+            pred.append(p)
+        pred = {k: torch.stack([p[k] for p in pred], 0).to(device) for k in pred[0]}
+        return pred

imcui/third_party/dad/dad/detectors/third_party/lightglue/lightglue.py ADDED Viewed

	@@ -0,0 +1,655 @@

+import warnings
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Callable, List, Optional, Tuple
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch import nn
+try:
+    from flash_attn.modules.mha import FlashCrossAttention
+except ModuleNotFoundError:
+    FlashCrossAttention = None
+if FlashCrossAttention or hasattr(F, "scaled_dot_product_attention"):
+    FLASH_AVAILABLE = True
+else:
+    FLASH_AVAILABLE = False
+torch.backends.cudnn.deterministic = True
+@torch.amp.custom_fwd(device_type="cuda", cast_inputs=torch.float32)
+def normalize_keypoints(
+    kpts: torch.Tensor, size: Optional[torch.Tensor] = None
+) -> torch.Tensor:
+    if size is None:
+        size = 1 + kpts.max(-2).values - kpts.min(-2).values
+    elif not isinstance(size, torch.Tensor):
+        size = torch.tensor(size, device=kpts.device, dtype=kpts.dtype)
+    size = size.to(kpts)
+    shift = size / 2
+    scale = size.max(-1).values / 2
+    kpts = (kpts - shift[..., None, :]) / scale[..., None, None]
+    return kpts
+def pad_to_length(x: torch.Tensor, length: int) -> Tuple[torch.Tensor]:
+    if length <= x.shape[-2]:
+        return x, torch.ones_like(x[..., :1], dtype=torch.bool)
+    pad = torch.ones(
+        *x.shape[:-2], length - x.shape[-2], x.shape[-1], device=x.device, dtype=x.dtype
+    )
+    y = torch.cat([x, pad], dim=-2)
+    mask = torch.zeros(*y.shape[:-1], 1, dtype=torch.bool, device=x.device)
+    mask[..., : x.shape[-2], :] = True
+    return y, mask
+def rotate_half(x: torch.Tensor) -> torch.Tensor:
+    x = x.unflatten(-1, (-1, 2))
+    x1, x2 = x.unbind(dim=-1)
+    return torch.stack((-x2, x1), dim=-1).flatten(start_dim=-2)
+def apply_cached_rotary_emb(freqs: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
+    return (t * freqs[0]) + (rotate_half(t) * freqs[1])
+class LearnableFourierPositionalEncoding(nn.Module):
+    def __init__(self, M: int, dim: int, F_dim: int = None, gamma: float = 1.0) -> None:
+        super().__init__()
+        F_dim = F_dim if F_dim is not None else dim
+        self.gamma = gamma
+        self.Wr = nn.Linear(M, F_dim // 2, bias=False)
+        nn.init.normal_(self.Wr.weight.data, mean=0, std=self.gamma**-2)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """encode position vector"""
+        projected = self.Wr(x)
+        cosines, sines = torch.cos(projected), torch.sin(projected)
+        emb = torch.stack([cosines, sines], 0).unsqueeze(-3)
+        return emb.repeat_interleave(2, dim=-1)
+class TokenConfidence(nn.Module):
+    def __init__(self, dim: int) -> None:
+        super().__init__()
+        self.token = nn.Sequential(nn.Linear(dim, 1), nn.Sigmoid())
+    def forward(self, desc0: torch.Tensor, desc1: torch.Tensor):
+        """get confidence tokens"""
+        return (
+            self.token(desc0.detach()).squeeze(-1),
+            self.token(desc1.detach()).squeeze(-1),
+        )
+class Attention(nn.Module):
+    def __init__(self, allow_flash: bool) -> None:
+        super().__init__()
+        if allow_flash and not FLASH_AVAILABLE:
+            warnings.warn(
+                "FlashAttention is not available. For optimal speed, "
+                "consider installing torch >= 2.0 or flash-attn.",
+                stacklevel=2,
+            )
+        self.enable_flash = allow_flash and FLASH_AVAILABLE
+        self.has_sdp = hasattr(F, "scaled_dot_product_attention")
+        if allow_flash and FlashCrossAttention:
+            self.flash_ = FlashCrossAttention()
+        if self.has_sdp:
+            torch.backends.cuda.enable_flash_sdp(allow_flash)
+    def forward(self, q, k, v, mask: Optional[torch.Tensor] = None) -> torch.Tensor:
+        if q.shape[-2] == 0 or k.shape[-2] == 0:
+            return q.new_zeros((*q.shape[:-1], v.shape[-1]))
+        if self.enable_flash and q.device.type == "cuda":
+            # use torch 2.0 scaled_dot_product_attention with flash
+            if self.has_sdp:
+                args = [x.half().contiguous() for x in [q, k, v]]
+                v = F.scaled_dot_product_attention(*args, attn_mask=mask).to(q.dtype)
+                return v if mask is None else v.nan_to_num()
+            else:
+                assert mask is None
+                q, k, v = [x.transpose(-2, -3).contiguous() for x in [q, k, v]]
+                m = self.flash_(q.half(), torch.stack([k, v], 2).half())
+                return m.transpose(-2, -3).to(q.dtype).clone()
+        elif self.has_sdp:
+            args = [x.contiguous() for x in [q, k, v]]
+            v = F.scaled_dot_product_attention(*args, attn_mask=mask)
+            return v if mask is None else v.nan_to_num()
+        else:
+            s = q.shape[-1] ** -0.5
+            sim = torch.einsum("...id,...jd->...ij", q, k) * s
+            if mask is not None:
+                sim.masked_fill(~mask, -float("inf"))
+            attn = F.softmax(sim, -1)
+            return torch.einsum("...ij,...jd->...id", attn, v)
+class SelfBlock(nn.Module):
+    def __init__(
+        self, embed_dim: int, num_heads: int, flash: bool = False, bias: bool = True
+    ) -> None:
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        assert self.embed_dim % num_heads == 0
+        self.head_dim = self.embed_dim // num_heads
+        self.Wqkv = nn.Linear(embed_dim, 3 * embed_dim, bias=bias)
+        self.inner_attn = Attention(flash)
+        self.out_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
+        self.ffn = nn.Sequential(
+            nn.Linear(2 * embed_dim, 2 * embed_dim),
+            nn.LayerNorm(2 * embed_dim, elementwise_affine=True),
+            nn.GELU(),
+            nn.Linear(2 * embed_dim, embed_dim),
+        )
+    def forward(
+        self,
+        x: torch.Tensor,
+        encoding: torch.Tensor,
+        mask: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        qkv = self.Wqkv(x)
+        qkv = qkv.unflatten(-1, (self.num_heads, -1, 3)).transpose(1, 2)
+        q, k, v = qkv[..., 0], qkv[..., 1], qkv[..., 2]
+        q = apply_cached_rotary_emb(encoding, q)
+        k = apply_cached_rotary_emb(encoding, k)
+        context = self.inner_attn(q, k, v, mask=mask)
+        message = self.out_proj(context.transpose(1, 2).flatten(start_dim=-2))
+        return x + self.ffn(torch.cat([x, message], -1))
+class CrossBlock(nn.Module):
+    def __init__(
+        self, embed_dim: int, num_heads: int, flash: bool = False, bias: bool = True
+    ) -> None:
+        super().__init__()
+        self.heads = num_heads
+        dim_head = embed_dim // num_heads
+        self.scale = dim_head**-0.5
+        inner_dim = dim_head * num_heads
+        self.to_qk = nn.Linear(embed_dim, inner_dim, bias=bias)
+        self.to_v = nn.Linear(embed_dim, inner_dim, bias=bias)
+        self.to_out = nn.Linear(inner_dim, embed_dim, bias=bias)
+        self.ffn = nn.Sequential(
+            nn.Linear(2 * embed_dim, 2 * embed_dim),
+            nn.LayerNorm(2 * embed_dim, elementwise_affine=True),
+            nn.GELU(),
+            nn.Linear(2 * embed_dim, embed_dim),
+        )
+        if flash and FLASH_AVAILABLE:
+            self.flash = Attention(True)
+        else:
+            self.flash = None
+    def map_(self, func: Callable, x0: torch.Tensor, x1: torch.Tensor):
+        return func(x0), func(x1)
+    def forward(
+        self, x0: torch.Tensor, x1: torch.Tensor, mask: Optional[torch.Tensor] = None
+    ) -> List[torch.Tensor]:
+        qk0, qk1 = self.map_(self.to_qk, x0, x1)
+        v0, v1 = self.map_(self.to_v, x0, x1)
+        qk0, qk1, v0, v1 = map(
+            lambda t: t.unflatten(-1, (self.heads, -1)).transpose(1, 2),
+            (qk0, qk1, v0, v1),
+        )
+        if self.flash is not None and qk0.device.type == "cuda":
+            m0 = self.flash(qk0, qk1, v1, mask)
+            m1 = self.flash(
+                qk1, qk0, v0, mask.transpose(-1, -2) if mask is not None else None
+            )
+        else:
+            qk0, qk1 = qk0 * self.scale**0.5, qk1 * self.scale**0.5
+            sim = torch.einsum("bhid, bhjd -> bhij", qk0, qk1)
+            if mask is not None:
+                sim = sim.masked_fill(~mask, -float("inf"))
+            attn01 = F.softmax(sim, dim=-1)
+            attn10 = F.softmax(sim.transpose(-2, -1).contiguous(), dim=-1)
+            m0 = torch.einsum("bhij, bhjd -> bhid", attn01, v1)
+            m1 = torch.einsum("bhji, bhjd -> bhid", attn10.transpose(-2, -1), v0)
+            if mask is not None:
+                m0, m1 = m0.nan_to_num(), m1.nan_to_num()
+        m0, m1 = self.map_(lambda t: t.transpose(1, 2).flatten(start_dim=-2), m0, m1)
+        m0, m1 = self.map_(self.to_out, m0, m1)
+        x0 = x0 + self.ffn(torch.cat([x0, m0], -1))
+        x1 = x1 + self.ffn(torch.cat([x1, m1], -1))
+        return x0, x1
+class TransformerLayer(nn.Module):
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+        self.self_attn = SelfBlock(*args, **kwargs)
+        self.cross_attn = CrossBlock(*args, **kwargs)
+    def forward(
+        self,
+        desc0,
+        desc1,
+        encoding0,
+        encoding1,
+        mask0: Optional[torch.Tensor] = None,
+        mask1: Optional[torch.Tensor] = None,
+    ):
+        if mask0 is not None and mask1 is not None:
+            return self.masked_forward(desc0, desc1, encoding0, encoding1, mask0, mask1)
+        else:
+            desc0 = self.self_attn(desc0, encoding0)
+            desc1 = self.self_attn(desc1, encoding1)
+            return self.cross_attn(desc0, desc1)
+    # This part is compiled and allows padding inputs
+    def masked_forward(self, desc0, desc1, encoding0, encoding1, mask0, mask1):
+        mask = mask0 & mask1.transpose(-1, -2)
+        mask0 = mask0 & mask0.transpose(-1, -2)
+        mask1 = mask1 & mask1.transpose(-1, -2)
+        desc0 = self.self_attn(desc0, encoding0, mask0)
+        desc1 = self.self_attn(desc1, encoding1, mask1)
+        return self.cross_attn(desc0, desc1, mask)
+def sigmoid_log_double_softmax(
+    sim: torch.Tensor, z0: torch.Tensor, z1: torch.Tensor
+) -> torch.Tensor:
+    """create the log assignment matrix from logits and similarity"""
+    b, m, n = sim.shape
+    certainties = F.logsigmoid(z0) + F.logsigmoid(z1).transpose(1, 2)
+    scores0 = F.log_softmax(sim, 2)
+    scores1 = F.log_softmax(sim.transpose(-1, -2).contiguous(), 2).transpose(-1, -2)
+    scores = sim.new_full((b, m + 1, n + 1), 0)
+    scores[:, :m, :n] = scores0 + scores1 + certainties
+    scores[:, :-1, -1] = F.logsigmoid(-z0.squeeze(-1))
+    scores[:, -1, :-1] = F.logsigmoid(-z1.squeeze(-1))
+    return scores
+class MatchAssignment(nn.Module):
+    def __init__(self, dim: int) -> None:
+        super().__init__()
+        self.dim = dim
+        self.matchability = nn.Linear(dim, 1, bias=True)
+        self.final_proj = nn.Linear(dim, dim, bias=True)
+    def forward(self, desc0: torch.Tensor, desc1: torch.Tensor):
+        """build assignment matrix from descriptors"""
+        mdesc0, mdesc1 = self.final_proj(desc0), self.final_proj(desc1)
+        _, _, d = mdesc0.shape
+        mdesc0, mdesc1 = mdesc0 / d**0.25, mdesc1 / d**0.25
+        sim = torch.einsum("bmd,bnd->bmn", mdesc0, mdesc1)
+        z0 = self.matchability(desc0)
+        z1 = self.matchability(desc1)
+        scores = sigmoid_log_double_softmax(sim, z0, z1)
+        return scores, sim
+    def get_matchability(self, desc: torch.Tensor):
+        return torch.sigmoid(self.matchability(desc)).squeeze(-1)
+def filter_matches(scores: torch.Tensor, th: float):
+    """obtain matches from a log assignment matrix [Bx M+1 x N+1]"""
+    max0, max1 = scores[:, :-1, :-1].max(2), scores[:, :-1, :-1].max(1)
+    m0, m1 = max0.indices, max1.indices
+    indices0 = torch.arange(m0.shape[1], device=m0.device)[None]
+    indices1 = torch.arange(m1.shape[1], device=m1.device)[None]
+    mutual0 = indices0 == m1.gather(1, m0)
+    mutual1 = indices1 == m0.gather(1, m1)
+    max0_exp = max0.values.exp()
+    zero = max0_exp.new_tensor(0)
+    mscores0 = torch.where(mutual0, max0_exp, zero)
+    mscores1 = torch.where(mutual1, mscores0.gather(1, m1), zero)
+    valid0 = mutual0 & (mscores0 > th)
+    valid1 = mutual1 & valid0.gather(1, m1)
+    m0 = torch.where(valid0, m0, -1)
+    m1 = torch.where(valid1, m1, -1)
+    return m0, m1, mscores0, mscores1
+class LightGlue(nn.Module):
+    default_conf = {
+        "name": "lightglue",  # just for interfacing
+        "input_dim": 256,  # input descriptor dimension (autoselected from weights)
+        "descriptor_dim": 256,
+        "add_scale_ori": False,
+        "n_layers": 9,
+        "num_heads": 4,
+        "flash": True,  # enable FlashAttention if available.
+        "mp": False,  # enable mixed precision
+        "depth_confidence": 0.95,  # early stopping, disable with -1
+        "width_confidence": 0.99,  # point pruning, disable with -1
+        "filter_threshold": 0.1,  # match threshold
+        "weights": None,
+    }
+    # Point pruning involves an overhead (gather).
+    # Therefore, we only activate it if there are enough keypoints.
+    pruning_keypoint_thresholds = {
+        "cpu": -1,
+        "mps": -1,
+        "cuda": 1024,
+        "flash": 1536,
+    }
+    required_data_keys = ["image0", "image1"]
+    version = "v0.1_arxiv"
+    url = "https://github.com/cvg/LightGlue/releases/download/{}/{}_lightglue.pth"
+    features = {
+        "superpoint": {
+            "weights": "superpoint_lightglue",
+            "input_dim": 256,
+        },
+        "disk": {
+            "weights": "disk_lightglue",
+            "input_dim": 128,
+        },
+        "aliked": {
+            "weights": "aliked_lightglue",
+            "input_dim": 128,
+        },
+        "sift": {
+            "weights": "sift_lightglue",
+            "input_dim": 128,
+            "add_scale_ori": True,
+        },
+        "doghardnet": {
+            "weights": "doghardnet_lightglue",
+            "input_dim": 128,
+            "add_scale_ori": True,
+        },
+    }
+    def __init__(self, features="superpoint", **conf) -> None:
+        super().__init__()
+        self.conf = conf = SimpleNamespace(**{**self.default_conf, **conf})
+        if features is not None:
+            if features not in self.features:
+                raise ValueError(
+                    f"Unsupported features: {features} not in "
+                    f"{{{','.join(self.features)}}}"
+                )
+            for k, v in self.features[features].items():
+                setattr(conf, k, v)
+        if conf.input_dim != conf.descriptor_dim:
+            self.input_proj = nn.Linear(conf.input_dim, conf.descriptor_dim, bias=True)
+        else:
+            self.input_proj = nn.Identity()
+        head_dim = conf.descriptor_dim // conf.num_heads
+        self.posenc = LearnableFourierPositionalEncoding(
+            2 + 2 * self.conf.add_scale_ori, head_dim, head_dim
+        )
+        h, n, d = conf.num_heads, conf.n_layers, conf.descriptor_dim
+        self.transformers = nn.ModuleList(
+            [TransformerLayer(d, h, conf.flash) for _ in range(n)]
+        )
+        self.log_assignment = nn.ModuleList([MatchAssignment(d) for _ in range(n)])
+        self.token_confidence = nn.ModuleList(
+            [TokenConfidence(d) for _ in range(n - 1)]
+        )
+        self.register_buffer(
+            "confidence_thresholds",
+            torch.Tensor(
+                [self.confidence_threshold(i) for i in range(self.conf.n_layers)]
+            ),
+        )
+        state_dict = None
+        if features is not None:
+            fname = f"{conf.weights}_{self.version.replace('.', '-')}.pth"
+            state_dict = torch.hub.load_state_dict_from_url(
+                self.url.format(self.version, features), file_name=fname
+            )
+            self.load_state_dict(state_dict, strict=False)
+        elif conf.weights is not None:
+            path = Path(__file__).parent
+            path = path / "weights/{}.pth".format(self.conf.weights)
+            state_dict = torch.load(str(path), map_location="cpu")
+        if state_dict:
+            # rename old state dict entries
+            for i in range(self.conf.n_layers):
+                pattern = f"self_attn.{i}", f"transformers.{i}.self_attn"
+                state_dict = {k.replace(*pattern): v for k, v in state_dict.items()}
+                pattern = f"cross_attn.{i}", f"transformers.{i}.cross_attn"
+                state_dict = {k.replace(*pattern): v for k, v in state_dict.items()}
+            self.load_state_dict(state_dict, strict=False)
+        # static lengths LightGlue is compiled for (only used with torch.compile)
+        self.static_lengths = None
+    def compile(
+        self, mode="reduce-overhead", static_lengths=[256, 512, 768, 1024, 1280, 1536]
+    ):
+        if self.conf.width_confidence != -1:
+            warnings.warn(
+                "Point pruning is partially disabled for compiled forward.",
+                stacklevel=2,
+            )
+        torch._inductor.cudagraph_mark_step_begin()
+        for i in range(self.conf.n_layers):
+            self.transformers[i].masked_forward = torch.compile(
+                self.transformers[i].masked_forward, mode=mode, fullgraph=True
+            )
+        self.static_lengths = static_lengths
+    def forward(self, data: dict) -> dict:
+        """
+        Match keypoints and descriptors between two images
+        Input (dict):
+            image0: dict
+                keypoints: [B x M x 2]
+                descriptors: [B x M x D]
+                image: [B x C x H x W] or image_size: [B x 2]
+            image1: dict
+                keypoints: [B x N x 2]
+                descriptors: [B x N x D]
+                image: [B x C x H x W] or image_size: [B x 2]
+        Output (dict):
+            matches0: [B x M]
+            matching_scores0: [B x M]
+            matches1: [B x N]
+            matching_scores1: [B x N]
+            matches: List[[Si x 2]]
+            scores: List[[Si]]
+            stop: int
+            prune0: [B x M]
+            prune1: [B x N]
+        """
+        with torch.autocast(enabled=self.conf.mp, device_type="cuda"):
+            return self._forward(data)
+    def _forward(self, data: dict) -> dict:
+        for key in self.required_data_keys:
+            assert key in data, f"Missing key {key} in data"
+        data0, data1 = data["image0"], data["image1"]
+        kpts0, kpts1 = data0["keypoints"], data1["keypoints"]
+        b, m, _ = kpts0.shape
+        b, n, _ = kpts1.shape
+        device = kpts0.device
+        size0, size1 = data0.get("image_size"), data1.get("image_size")
+        kpts0 = normalize_keypoints(kpts0, size0).clone()
+        kpts1 = normalize_keypoints(kpts1, size1).clone()
+        if self.conf.add_scale_ori:
+            kpts0 = torch.cat(
+                [kpts0] + [data0[k].unsqueeze(-1) for k in ("scales", "oris")], -1
+            )
+            kpts1 = torch.cat(
+                [kpts1] + [data1[k].unsqueeze(-1) for k in ("scales", "oris")], -1
+            )
+        desc0 = data0["descriptors"].detach().contiguous()
+        desc1 = data1["descriptors"].detach().contiguous()
+        assert desc0.shape[-1] == self.conf.input_dim
+        assert desc1.shape[-1] == self.conf.input_dim
+        if torch.is_autocast_enabled():
+            desc0 = desc0.half()
+            desc1 = desc1.half()
+        mask0, mask1 = None, None
+        c = max(m, n)
+        do_compile = self.static_lengths and c <= max(self.static_lengths)
+        if do_compile:
+            kn = min([k for k in self.static_lengths if k >= c])
+            desc0, mask0 = pad_to_length(desc0, kn)
+            desc1, mask1 = pad_to_length(desc1, kn)
+            kpts0, _ = pad_to_length(kpts0, kn)
+            kpts1, _ = pad_to_length(kpts1, kn)
+        desc0 = self.input_proj(desc0)
+        desc1 = self.input_proj(desc1)
+        # cache positional embeddings
+        encoding0 = self.posenc(kpts0)
+        encoding1 = self.posenc(kpts1)
+        # GNN + final_proj + assignment
+        do_early_stop = self.conf.depth_confidence > 0
+        do_point_pruning = self.conf.width_confidence > 0 and not do_compile
+        pruning_th = self.pruning_min_kpts(device)
+        if do_point_pruning:
+            ind0 = torch.arange(0, m, device=device)[None]
+            ind1 = torch.arange(0, n, device=device)[None]
+            # We store the index of the layer at which pruning is detected.
+            prune0 = torch.ones_like(ind0)
+            prune1 = torch.ones_like(ind1)
+        token0, token1 = None, None
+        for i in range(self.conf.n_layers):
+            if desc0.shape[1] == 0 or desc1.shape[1] == 0:  # no keypoints
+                break
+            desc0, desc1 = self.transformers[i](
+                desc0, desc1, encoding0, encoding1, mask0=mask0, mask1=mask1
+            )
+            if i == self.conf.n_layers - 1:
+                continue  # no early stopping or adaptive width at last layer
+            if do_early_stop:
+                token0, token1 = self.token_confidence[i](desc0, desc1)
+                if self.check_if_stop(token0[..., :m], token1[..., :n], i, m + n):
+                    break
+            if do_point_pruning and desc0.shape[-2] > pruning_th:
+                scores0 = self.log_assignment[i].get_matchability(desc0)
+                prunemask0 = self.get_pruning_mask(token0, scores0, i)
+                keep0 = torch.where(prunemask0)[1]
+                ind0 = ind0.index_select(1, keep0)
+                desc0 = desc0.index_select(1, keep0)
+                encoding0 = encoding0.index_select(-2, keep0)
+                prune0[:, ind0] += 1
+            if do_point_pruning and desc1.shape[-2] > pruning_th:
+                scores1 = self.log_assignment[i].get_matchability(desc1)
+                prunemask1 = self.get_pruning_mask(token1, scores1, i)
+                keep1 = torch.where(prunemask1)[1]
+                ind1 = ind1.index_select(1, keep1)
+                desc1 = desc1.index_select(1, keep1)
+                encoding1 = encoding1.index_select(-2, keep1)
+                prune1[:, ind1] += 1
+        if desc0.shape[1] == 0 or desc1.shape[1] == 0:  # no keypoints
+            m0 = desc0.new_full((b, m), -1, dtype=torch.long)
+            m1 = desc1.new_full((b, n), -1, dtype=torch.long)
+            mscores0 = desc0.new_zeros((b, m))
+            mscores1 = desc1.new_zeros((b, n))
+            matches = desc0.new_empty((b, 0, 2), dtype=torch.long)
+            mscores = desc0.new_empty((b, 0))
+            if not do_point_pruning:
+                prune0 = torch.ones_like(mscores0) * self.conf.n_layers
+                prune1 = torch.ones_like(mscores1) * self.conf.n_layers
+            return {
+                "matches0": m0,
+                "matches1": m1,
+                "matching_scores0": mscores0,
+                "matching_scores1": mscores1,
+                "stop": i + 1,
+                "matches": matches,
+                "scores": mscores,
+                "prune0": prune0,
+                "prune1": prune1,
+            }
+        desc0, desc1 = desc0[..., :m, :], desc1[..., :n, :]  # remove padding
+        scores, _ = self.log_assignment[i](desc0, desc1)
+        m0, m1, mscores0, mscores1 = filter_matches(scores, self.conf.filter_threshold)
+        matches, mscores = [], []
+        for k in range(b):
+            valid = m0[k] > -1
+            m_indices_0 = torch.where(valid)[0]
+            m_indices_1 = m0[k][valid]
+            if do_point_pruning:
+                m_indices_0 = ind0[k, m_indices_0]
+                m_indices_1 = ind1[k, m_indices_1]
+            matches.append(torch.stack([m_indices_0, m_indices_1], -1))
+            mscores.append(mscores0[k][valid])
+        # TODO: Remove when hloc switches to the compact format.
+        if do_point_pruning:
+            m0_ = torch.full((b, m), -1, device=m0.device, dtype=m0.dtype)
+            m1_ = torch.full((b, n), -1, device=m1.device, dtype=m1.dtype)
+            m0_[:, ind0] = torch.where(m0 == -1, -1, ind1.gather(1, m0.clamp(min=0)))
+            m1_[:, ind1] = torch.where(m1 == -1, -1, ind0.gather(1, m1.clamp(min=0)))
+            mscores0_ = torch.zeros((b, m), device=mscores0.device)
+            mscores1_ = torch.zeros((b, n), device=mscores1.device)
+            mscores0_[:, ind0] = mscores0
+            mscores1_[:, ind1] = mscores1
+            m0, m1, mscores0, mscores1 = m0_, m1_, mscores0_, mscores1_
+        else:
+            prune0 = torch.ones_like(mscores0) * self.conf.n_layers
+            prune1 = torch.ones_like(mscores1) * self.conf.n_layers
+        return {
+            "matches0": m0,
+            "matches1": m1,
+            "matching_scores0": mscores0,
+            "matching_scores1": mscores1,
+            "stop": i + 1,
+            "matches": matches,
+            "scores": mscores,
+            "prune0": prune0,
+            "prune1": prune1,
+        }
+    def confidence_threshold(self, layer_index: int) -> float:
+        """scaled confidence threshold"""
+        threshold = 0.8 + 0.1 * np.exp(-4.0 * layer_index / self.conf.n_layers)
+        return np.clip(threshold, 0, 1)
+    def get_pruning_mask(
+        self, confidences: torch.Tensor, scores: torch.Tensor, layer_index: int
+    ) -> torch.Tensor:
+        """mask points which should be removed"""
+        keep = scores > (1 - self.conf.width_confidence)
+        if confidences is not None:  # Low-confidence points are never pruned.
+            keep |= confidences <= self.confidence_thresholds[layer_index]
+        return keep
+    def check_if_stop(
+        self,
+        confidences0: torch.Tensor,
+        confidences1: torch.Tensor,
+        layer_index: int,
+        num_points: int,
+    ) -> torch.Tensor:
+        """evaluate stopping condition"""
+        confidences = torch.cat([confidences0, confidences1], -1)
+        threshold = self.confidence_thresholds[layer_index]
+        ratio_confident = 1.0 - (confidences < threshold).float().sum() / num_points
+        return ratio_confident > self.conf.depth_confidence
+    def pruning_min_kpts(self, device: torch.device):
+        if self.conf.flash and FLASH_AVAILABLE and device.type == "cuda":
+            return self.pruning_keypoint_thresholds["flash"]
+        else:
+            return self.pruning_keypoint_thresholds[device.type]

imcui/third_party/dad/dad/detectors/third_party/lightglue/sift.py ADDED Viewed

	@@ -0,0 +1,216 @@

+import warnings
+import cv2
+import numpy as np
+import torch
+from kornia.color import rgb_to_grayscale
+from packaging import version
+try:
+    import pycolmap
+except ImportError:
+    pycolmap = None
+from .utils import Extractor
+def filter_dog_point(points, scales, angles, image_shape, nms_radius, scores=None):
+    h, w = image_shape
+    ij = np.round(points - 0.5).astype(int).T[::-1]
+    # Remove duplicate points (identical coordinates).
+    # Pick highest scale or score
+    s = scales if scores is None else scores
+    buffer = np.zeros((h, w))
+    np.maximum.at(buffer, tuple(ij), s)
+    keep = np.where(buffer[tuple(ij)] == s)[0]
+    # Pick lowest angle (arbitrary).
+    ij = ij[:, keep]
+    buffer[:] = np.inf
+    o_abs = np.abs(angles[keep])
+    np.minimum.at(buffer, tuple(ij), o_abs)
+    mask = buffer[tuple(ij)] == o_abs
+    ij = ij[:, mask]
+    keep = keep[mask]
+    if nms_radius > 0:
+        # Apply NMS on the remaining points
+        buffer[:] = 0
+        buffer[tuple(ij)] = s[keep]  # scores or scale
+        local_max = torch.nn.functional.max_pool2d(
+            torch.from_numpy(buffer).unsqueeze(0),
+            kernel_size=nms_radius * 2 + 1,
+            stride=1,
+            padding=nms_radius,
+        ).squeeze(0)
+        is_local_max = buffer == local_max.numpy()
+        keep = keep[is_local_max[tuple(ij)]]
+    return keep
+def sift_to_rootsift(x: torch.Tensor, eps=1e-6) -> torch.Tensor:
+    x = torch.nn.functional.normalize(x, p=1, dim=-1, eps=eps)
+    x.clip_(min=eps).sqrt_()
+    return torch.nn.functional.normalize(x, p=2, dim=-1, eps=eps)
+def run_opencv_sift(features: cv2.Feature2D, image: np.ndarray) -> np.ndarray:
+    """
+    Detect keypoints using OpenCV Detector.
+    Optionally, perform description.
+    Args:
+        features: OpenCV based keypoints detector and descriptor
+        image: Grayscale image of uint8 data type
+    Returns:
+        keypoints: 1D array of detected cv2.KeyPoint
+        scores: 1D array of responses
+        descriptors: 1D array of descriptors
+    """
+    detections, descriptors = features.detectAndCompute(image, None)
+    points = np.array([k.pt for k in detections], dtype=np.float32)
+    scores = np.array([k.response for k in detections], dtype=np.float32)
+    scales = np.array([k.size for k in detections], dtype=np.float32)
+    angles = np.deg2rad(np.array([k.angle for k in detections], dtype=np.float32))
+    return points, scores, scales, angles, descriptors
+class SIFT(Extractor):
+    default_conf = {
+        "rootsift": True,
+        "nms_radius": 0,  # None to disable filtering entirely.
+        "max_num_keypoints": 4096,
+        "backend": "opencv",  # in {opencv, pycolmap, pycolmap_cpu, pycolmap_cuda}
+        "detection_threshold": 0.0066667,  # from COLMAP
+        "edge_threshold": 10,
+        "first_octave": -1,  # only used by pycolmap, the default of COLMAP
+        "num_octaves": 4,
+    }
+    preprocess_conf = {
+        "resize": 1024,
+    }
+    required_data_keys = ["image"]
+    def __init__(self, **conf):
+        super().__init__(**conf)  # Update with default configuration.
+        backend = self.conf.backend
+        if backend.startswith("pycolmap"):
+            if pycolmap is None:
+                raise ImportError(
+                    "Cannot find module pycolmap: install it with pip"
+                    "or use backend=opencv."
+                )
+            options = {
+                "peak_threshold": self.conf.detection_threshold,
+                "edge_threshold": self.conf.edge_threshold,
+                "first_octave": self.conf.first_octave,
+                "num_octaves": self.conf.num_octaves,
+                "normalization": pycolmap.Normalization.L2,  # L1_ROOT is buggy.
+            }
+            device = (
+                "auto" if backend == "pycolmap" else backend.replace("pycolmap_", "")
+            )
+            if (
+                backend == "pycolmap_cpu" or not pycolmap.has_cuda
+            ) and pycolmap.__version__ < "0.5.0":
+                warnings.warn(
+                    "The pycolmap CPU SIFT is buggy in version < 0.5.0, "
+                    "consider upgrading pycolmap or use the CUDA version.",
+                    stacklevel=1,
+                )
+            else:
+                options["max_num_features"] = self.conf.max_num_keypoints
+            self.sift = pycolmap.Sift(options=options, device=device)
+        elif backend == "opencv":
+            self.sift = cv2.SIFT_create(
+                contrastThreshold=self.conf.detection_threshold,
+                nfeatures=self.conf.max_num_keypoints,
+                edgeThreshold=self.conf.edge_threshold,
+                nOctaveLayers=self.conf.num_octaves,
+            )
+        else:
+            backends = {"opencv", "pycolmap", "pycolmap_cpu", "pycolmap_cuda"}
+            raise ValueError(
+                f"Unknown backend: {backend} not in {{{','.join(backends)}}}."
+            )
+    def extract_single_image(self, image: torch.Tensor):
+        image_np = image.cpu().numpy().squeeze(0)
+        if self.conf.backend.startswith("pycolmap"):
+            if version.parse(pycolmap.__version__) >= version.parse("0.5.0"):
+                detections, descriptors = self.sift.extract(image_np)
+                scores = None  # Scores are not exposed by COLMAP anymore.
+            else:
+                detections, scores, descriptors = self.sift.extract(image_np)
+            keypoints = detections[:, :2]  # Keep only (x, y).
+            scales, angles = detections[:, -2:].T
+            if scores is not None and (
+                self.conf.backend == "pycolmap_cpu" or not pycolmap.has_cuda
+            ):
+                # Set the scores as a combination of abs. response and scale.
+                scores = np.abs(scores) * scales
+        elif self.conf.backend == "opencv":
+            # TODO: Check if opencv keypoints are already in corner convention
+            keypoints, scores, scales, angles, descriptors = run_opencv_sift(
+                self.sift, (image_np * 255.0).astype(np.uint8)
+            )
+        pred = {
+            "keypoints": keypoints,
+            "scales": scales,
+            "oris": angles,
+            "descriptors": descriptors,
+        }
+        if scores is not None:
+            pred["keypoint_scores"] = scores
+        # sometimes pycolmap returns points outside the image. We remove them
+        if self.conf.backend.startswith("pycolmap"):
+            is_inside = (
+                pred["keypoints"] + 0.5 < np.array([image_np.shape[-2:][::-1]])
+            ).all(-1)
+            pred = {k: v[is_inside] for k, v in pred.items()}
+        if self.conf.nms_radius is not None:
+            keep = filter_dog_point(
+                pred["keypoints"],
+                pred["scales"],
+                pred["oris"],
+                image_np.shape,
+                self.conf.nms_radius,
+                scores=pred.get("keypoint_scores"),
+            )
+            pred = {k: v[keep] for k, v in pred.items()}
+        pred = {k: torch.from_numpy(v) for k, v in pred.items()}
+        if scores is not None:
+            # Keep the k keypoints with highest score
+            num_points = self.conf.max_num_keypoints
+            if num_points is not None and len(pred["keypoints"]) > num_points:
+                indices = torch.topk(pred["keypoint_scores"], num_points).indices
+                pred = {k: v[indices] for k, v in pred.items()}
+        return pred
+    def forward(self, data: dict) -> dict:
+        image = data["image"]
+        if image.shape[1] == 3:
+            image = rgb_to_grayscale(image)
+        device = image.device
+        image = image.cpu()
+        pred = []
+        for k in range(len(image)):
+            img = image[k]
+            if "image_size" in data.keys():
+                # avoid extracting points in padded areas
+                w, h = data["image_size"][k]
+                img = img[:, :h, :w]
+            p = self.extract_single_image(img)
+            pred.append(p)
+        pred = {k: torch.stack([p[k] for p in pred], 0).to(device) for k in pred[0]}
+        if self.conf.rootsift:
+            pred["descriptors"] = sift_to_rootsift(pred["descriptors"])
+        return pred

imcui/third_party/dad/dad/detectors/third_party/lightglue/superpoint.py ADDED Viewed

	@@ -0,0 +1,233 @@

+# %BANNER_BEGIN%
+# ---------------------------------------------------------------------
+# %COPYRIGHT_BEGIN%
+#
+#  Magic Leap, Inc. ("COMPANY") CONFIDENTIAL
+#
+#  Unpublished Copyright (c) 2020
+#  Magic Leap, Inc., All Rights Reserved.
+#
+# NOTICE:  All information contained herein is, and remains the property
+# of COMPANY. The intellectual and technical concepts contained herein
+# are proprietary to COMPANY and may be covered by U.S. and Foreign
+# Patents, patents in process, and are protected by trade secret or
+# copyright law.  Dissemination of this information or reproduction of
+# this material is strictly forbidden unless prior written permission is
+# obtained from COMPANY.  Access to the source code contained herein is
+# hereby forbidden to anyone except current COMPANY employees, managers
+# or contractors who have executed Confidentiality and Non-disclosure
+# agreements explicitly covering such access.
+#
+# The copyright notice above does not evidence any actual or intended
+# publication or disclosure  of  this source code, which includes
+# information that is confidential and/or proprietary, and is a trade
+# secret, of  COMPANY.   ANY REPRODUCTION, MODIFICATION, DISTRIBUTION,
+# PUBLIC  PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE  OF THIS
+# SOURCE CODE  WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS
+# STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND
+# INTERNATIONAL TREATIES.  THE RECEIPT OR POSSESSION OF  THIS SOURCE
+# CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS
+# TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE,
+# USE, OR SELL ANYTHING THAT IT  MAY DESCRIBE, IN WHOLE OR IN PART.
+#
+# %COPYRIGHT_END%
+# ----------------------------------------------------------------------
+# %AUTHORS_BEGIN%
+#
+#  Originating Authors: Paul-Edouard Sarlin
+#
+# %AUTHORS_END%
+# --------------------------------------------------------------------*/
+# %BANNER_END%
+# Adapted by Remi Pautrat, Philipp Lindenberger
+import torch
+from kornia.color import rgb_to_grayscale
+from torch import nn
+from .utils import Extractor
+def simple_nms(scores, nms_radius: int):
+    """Fast Non-maximum suppression to remove nearby points"""
+    assert nms_radius >= 0
+    def max_pool(x):
+        return torch.nn.functional.max_pool2d(
+            x, kernel_size=nms_radius * 2 + 1, stride=1, padding=nms_radius
+        )
+    zeros = torch.zeros_like(scores)
+    max_mask = scores == max_pool(scores)
+    for _ in range(2):
+        supp_mask = max_pool(max_mask.float()) > 0
+        supp_scores = torch.where(supp_mask, zeros, scores)
+        new_max_mask = supp_scores == max_pool(supp_scores)
+        max_mask = max_mask | (new_max_mask & (~supp_mask))
+    return torch.where(max_mask, scores, zeros)
+def top_k_keypoints(keypoints, scores, k):
+    if k >= len(keypoints):
+        return keypoints, scores
+    scores, indices = torch.topk(scores, k, dim=0, sorted=True)
+    return keypoints[indices], scores
+def sample_descriptors(keypoints, descriptors, s: int = 8):
+    """Interpolate descriptors at keypoint locations"""
+    b, c, h, w = descriptors.shape
+    keypoints = keypoints - s / 2 + 0.5
+    keypoints /= torch.tensor(
+        [(w * s - s / 2 - 0.5), (h * s - s / 2 - 0.5)],
+    ).to(keypoints)[None]
+    keypoints = keypoints * 2 - 1  # normalize to (-1, 1)
+    args = {"align_corners": True} if torch.__version__ >= "1.3" else {}
+    descriptors = torch.nn.functional.grid_sample(
+        descriptors, keypoints.view(b, 1, -1, 2), mode="bilinear", **args
+    )
+    descriptors = torch.nn.functional.normalize(
+        descriptors.reshape(b, c, -1), p=2, dim=1
+    )
+    return descriptors
+class SuperPoint(Extractor):
+    """SuperPoint Convolutional Detector and Descriptor
+    SuperPoint: Self-Supervised Interest Point Detection and
+    Description. Daniel DeTone, Tomasz Malisiewicz, and Andrew
+    Rabinovich. In CVPRW, 2019. https://arxiv.org/abs/1712.07629
+    """
+    default_conf = {
+        "descriptor_dim": 256,
+        "nms_radius": 4,
+        "max_num_keypoints": None,
+        # TODO: detection threshold
+        "detection_threshold": 0.0005,
+        "remove_borders": 4,
+    }
+    preprocess_conf = {
+        "resize": 1024,
+    }
+    required_data_keys = ["image"]
+    def __init__(self, **conf):
+        super().__init__(**conf)  # Update with default configuration.
+        self.relu = nn.ReLU(inplace=True)
+        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
+        c1, c2, c3, c4, c5 = 64, 64, 128, 128, 256
+        self.conv1a = nn.Conv2d(1, c1, kernel_size=3, stride=1, padding=1)
+        self.conv1b = nn.Conv2d(c1, c1, kernel_size=3, stride=1, padding=1)
+        self.conv2a = nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1)
+        self.conv2b = nn.Conv2d(c2, c2, kernel_size=3, stride=1, padding=1)
+        self.conv3a = nn.Conv2d(c2, c3, kernel_size=3, stride=1, padding=1)
+        self.conv3b = nn.Conv2d(c3, c3, kernel_size=3, stride=1, padding=1)
+        self.conv4a = nn.Conv2d(c3, c4, kernel_size=3, stride=1, padding=1)
+        self.conv4b = nn.Conv2d(c4, c4, kernel_size=3, stride=1, padding=1)
+        self.convPa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
+        self.convPb = nn.Conv2d(c5, 65, kernel_size=1, stride=1, padding=0)
+        self.convDa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
+        self.convDb = nn.Conv2d(
+            c5, self.conf.descriptor_dim, kernel_size=1, stride=1, padding=0
+        )
+        url = "https://github.com/cvg/LightGlue/releases/download/v0.1_arxiv/superpoint_v1.pth"  # noqa
+        self.load_state_dict(torch.hub.load_state_dict_from_url(url))
+        if self.conf.max_num_keypoints is not None and self.conf.max_num_keypoints <= 0:
+            raise ValueError("max_num_keypoints must be positive or None")
+    def forward(self, data: dict) -> dict:
+        """Compute keypoints, scores, descriptors for image"""
+        for key in self.required_data_keys:
+            assert key in data, f"Missing key {key} in data"
+        image = data["image"]
+        if image.shape[1] == 3:
+            image = rgb_to_grayscale(image)
+        # Shared Encoder
+        x = self.relu(self.conv1a(image))
+        x = self.relu(self.conv1b(x))
+        x = self.pool(x)
+        x = self.relu(self.conv2a(x))
+        x = self.relu(self.conv2b(x))
+        x = self.pool(x)
+        x = self.relu(self.conv3a(x))
+        x = self.relu(self.conv3b(x))
+        x = self.pool(x)
+        x = self.relu(self.conv4a(x))
+        x = self.relu(self.conv4b(x))
+        # Compute the dense keypoint scores
+        cPa = self.relu(self.convPa(x))
+        scores = self.convPb(cPa)
+        scores = torch.nn.functional.softmax(scores, 1)[:, :-1]
+        b, _, h, w = scores.shape
+        scores = scores.permute(0, 2, 3, 1).reshape(b, h, w, 8, 8)
+        scores = scores.permute(0, 1, 3, 2, 4).reshape(b, h * 8, w * 8)
+        scores = simple_nms(scores, self.conf.nms_radius)
+        # Discard keypoints near the image borders
+        if self.conf.remove_borders:
+            pad = self.conf.remove_borders
+            scores[:, :pad] = -1
+            scores[:, :, :pad] = -1
+            scores[:, -pad:] = -1
+            scores[:, :, -pad:] = -1
+        # Extract keypoints
+        best_kp = torch.where(scores > self.conf.detection_threshold)
+        scores = scores[best_kp]
+        # Separate into batches
+        keypoints = [
+            torch.stack(best_kp[1:3], dim=-1)[best_kp[0] == i] for i in range(b)
+        ]
+        scores = [scores[best_kp[0] == i] for i in range(b)]
+        # Keep the k keypoints with highest score
+        if self.conf.max_num_keypoints is not None:
+            keypoints, scores = list(
+                zip(
+                    *[
+                        top_k_keypoints(k, s, self.conf.max_num_keypoints)
+                        for k, s in zip(keypoints, scores)
+                    ]
+                )
+            )
+        # Convert (h, w) to (x, y)
+        keypoints = [torch.flip(k, [1]).float() for k in keypoints]
+        # Compute the dense descriptors
+        cDa = self.relu(self.convDa(x))
+        descriptors = self.convDb(cDa)
+        descriptors = torch.nn.functional.normalize(descriptors, p=2, dim=1)
+        # Extract descriptors
+        descriptors = [
+            sample_descriptors(k[None], d[None], 8)[0]
+            for k, d in zip(keypoints, descriptors)
+        ]
+        return {
+            "keypoints": torch.stack(keypoints, 0),
+            "keypoint_scores": torch.stack(scores, 0),
+            "descriptors": torch.stack(descriptors, 0).transpose(-1, -2).contiguous(),
+        }
+class ReinforcedFP(SuperPoint):
+    def __init__(self, **conf):
+        super().__init__(**conf)  # Update with default configuration.
+        url = "https://github.com/aritrabhowmik/Reinforced-Feature-Points/raw/refs/heads/master/weights/baseline_mixed_loss.pth"  # noqa
+        self.load_state_dict(torch.hub.load_state_dict_from_url(url))

imcui/third_party/dad/dad/detectors/third_party/lightglue/utils.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import collections.abc as collections
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Callable, List, Optional, Tuple, Union
+import cv2
+import kornia
+import numpy as np
+import torch
+class ImagePreprocessor:
+    default_conf = {
+        "resize": None,  # target edge length, None for no resizing
+        "side": "long",
+        "interpolation": "bilinear",
+        "align_corners": None,
+        "antialias": True,
+    }
+    def __init__(self, **conf) -> None:
+        super().__init__()
+        self.conf = {**self.default_conf, **conf}
+        self.conf = SimpleNamespace(**self.conf)
+    def __call__(self, img: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Resize and preprocess an image, return image and resize scale"""
+        h, w = img.shape[-2:]
+        if self.conf.resize is not None:
+            img = kornia.geometry.transform.resize(
+                img,
+                self.conf.resize,
+                side=self.conf.side,
+                antialias=self.conf.antialias,
+                align_corners=self.conf.align_corners,
+            )
+        scale = torch.Tensor([img.shape[-1] / w, img.shape[-2] / h]).to(img)
+        return img, scale
+def map_tensor(input_, func: Callable):
+    string_classes = (str, bytes)
+    if isinstance(input_, string_classes):
+        return input_
+    elif isinstance(input_, collections.Mapping):
+        return {k: map_tensor(sample, func) for k, sample in input_.items()}
+    elif isinstance(input_, collections.Sequence):
+        return [map_tensor(sample, func) for sample in input_]
+    elif isinstance(input_, torch.Tensor):
+        return func(input_)
+    else:
+        return input_
+def batch_to_device(batch: dict, device: str = "cpu", non_blocking: bool = True):
+    """Move batch (dict) to device"""
+    def _func(tensor):
+        return tensor.to(device=device, non_blocking=non_blocking).detach()
+    return map_tensor(batch, _func)
+def rbd(data: dict) -> dict:
+    """Remove batch dimension from elements in data"""
+    return {
+        k: v[0] if isinstance(v, (torch.Tensor, np.ndarray, list)) else v
+        for k, v in data.items()
+    }
+def numpy_image_to_torch(image: np.ndarray) -> torch.Tensor:
+    """Normalize the image tensor and reorder the dimensions."""
+    if image.ndim == 3:
+        image = image.transpose((2, 0, 1))  # HxWxC to CxHxW
+    elif image.ndim == 2:
+        image = image[None]  # add channel axis
+    else:
+        raise ValueError(f"Not an image: {image.shape}")
+    return torch.tensor(image / 255.0, dtype=torch.float)
+def resize_image(
+    image: np.ndarray,
+    size: Union[List[int], int],
+    fn: str = "max",
+    interp: Optional[str] = "area",
+) -> np.ndarray:
+    """Resize an image to a fixed size, or according to max or min edge."""
+    h, w = image.shape[:2]
+    fn = {"max": max, "min": min}[fn]
+    if isinstance(size, int):
+        scale = size / fn(h, w)
+        h_new, w_new = int(round(h * scale)), int(round(w * scale))
+        scale = (w_new / w, h_new / h)
+    elif isinstance(size, (tuple, list)):
+        h_new, w_new = size
+        scale = (w_new / w, h_new / h)
+    else:
+        raise ValueError(f"Incorrect new size: {size}")
+    mode = {
+        "linear": cv2.INTER_LINEAR,
+        "cubic": cv2.INTER_CUBIC,
+        "nearest": cv2.INTER_NEAREST,
+        "area": cv2.INTER_AREA,
+    }[interp]
+    return cv2.resize(image, (w_new, h_new), interpolation=mode), scale
+def load_image(path: Path, resize: int = None, **kwargs) -> torch.Tensor:
+    if not Path(path).exists():
+        raise FileNotFoundError(f"No image at path {path}.")
+    mode = cv2.IMREAD_COLOR
+    image = cv2.imread(str(path), mode)
+    if image is None:
+        raise IOError(f"Could not read image at {path}.")
+    image = image[..., ::-1]
+    if resize is not None:
+        image, _ = resize_image(image, resize, **kwargs)
+    return numpy_image_to_torch(image)
+class Extractor(torch.nn.Module):
+    def __init__(self, **conf):
+        super().__init__()
+        self.conf = SimpleNamespace(**{**self.default_conf, **conf})
+    @torch.no_grad()
+    def extract(self, img: torch.Tensor, **conf) -> dict:
+        """Perform extraction with online resizing"""
+        if img.dim() == 3:
+            img = img[None]  # add batch dim
+        assert img.dim() == 4 and img.shape[0] == 1
+        shape = img.shape[-2:][::-1]
+        img, scales = ImagePreprocessor(**{**self.preprocess_conf, **conf})(img)
+        feats = self.forward({"image": img})
+        feats["image_size"] = torch.tensor(shape)[None].to(img).float()
+        feats["keypoints"] = (feats["keypoints"] + 0.5) / scales[None] - 0.5
+        return feats
+def match_pair(
+    extractor,
+    matcher,
+    image0: torch.Tensor,
+    image1: torch.Tensor,
+    device: str = "cpu",
+    **preprocess,
+):
+    """Match a pair of images (image0, image1) with an extractor and matcher"""
+    feats0 = extractor.extract(image0, **preprocess)
+    feats1 = extractor.extract(image1, **preprocess)
+    matches01 = matcher({"image0": feats0, "image1": feats1})
+    data = [feats0, feats1, matches01]
+    # remove batch dim and move to target device
+    feats0, feats1, matches01 = [batch_to_device(rbd(x), device) for x in data]
+    return feats0, feats1, matches01

imcui/third_party/dad/dad/detectors/third_party/lightglue_detector.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from pathlib import Path
+from typing import Union
+import torch
+from .lightglue.utils import load_image
+from dad.utils import (
+    get_best_device,
+)
+from dad.types import Detector
+class LightGlueDetector(Detector):
+    def __init__(self, model, resize=None, **kwargs):
+        super().__init__()
+        self.model = model(**kwargs).eval().to(get_best_device())
+        if resize is not None:
+            self.model.preprocess_conf["resize"] = resize
+    @property
+    def topleft(self):
+        return 0.0
+    def load_image(self, im_path: Union[str, Path]):
+        return {"image": load_image(im_path).to(get_best_device())}
+    @torch.inference_mode()
+    def detect(
+        self,
+        batch: dict[str, torch.Tensor],
+        *,
+        num_keypoints: int,
+        return_dense_probs: bool = False,
+    ):
+        image = batch["image"]
+        self.model.conf.max_num_keypoints = num_keypoints
+        ret = self.model.extract(image)
+        kpts = self.to_normalized_coords(
+            ret["keypoints"], ret["image_size"][0, 1], ret["image_size"][0, 0]
+        )
+        result = {"keypoints": kpts, "keypoint_probs": None}
+        if return_dense_probs:
+            result["dense_probs"] = ret["dense_probs"] if "dense_probs" in ret else None
+        return result

imcui/third_party/dad/dad/detectors/third_party/rekd/config.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import argparse
+## for fix seed
+import random
+import torch
+import numpy
+def get_config(jupyter=False):
+    parser = argparse.ArgumentParser(description="Train REKD Architecture")
+    ## basic configuration
+    parser.add_argument(
+        "--data_dir",
+        type=str,
+        default="../ImageNet2012/ILSVRC2012_img_val",  # default='path-to-ImageNet',
+        help="The root path to the data from which the synthetic dataset will be created.",
+    )
+    parser.add_argument(
+        "--synth_dir",
+        type=str,
+        default="",
+        help="The path to save the generated sythetic image pairs.",
+    )
+    parser.add_argument(
+        "--log_dir",
+        type=str,
+        default="trained_models/weights",
+        help="The path to save the REKD weights.",
+    )
+    parser.add_argument(
+        "--load_dir",
+        type=str,
+        default="",
+        help="Set saved model parameters if resume training is desired.",
+    )
+    parser.add_argument(
+        "--exp_name",
+        type=str,
+        default="REKD",
+        help="The Rotaton-equivaraiant Keypoint Detection (REKD) experiment name",
+    )
+    ## network architecture
+    parser.add_argument(
+        "--factor_scaling_pyramid",
+        type=float,
+        default=1.2,
+        help="The scale factor between the multi-scale pyramid levels in the architecture.",
+    )
+    parser.add_argument(
+        "--group_size",
+        type=int,
+        default=36,
+        help="The number of groups for the group convolution.",
+    )
+    parser.add_argument(
+        "--dim_first",
+        type=int,
+        default=2,
+        help="The number of channels of the first layer",
+    )
+    parser.add_argument(
+        "--dim_second",
+        type=int,
+        default=2,
+        help="The number of channels of the second layer",
+    )
+    parser.add_argument(
+        "--dim_third",
+        type=int,
+        default=2,
+        help="The number of channels of the thrid layer",
+    )
+    ## network training
+    parser.add_argument(
+        "--batch_size", type=int, default=16, help="The batch size for training."
+    )
+    parser.add_argument(
+        "--num_epochs", type=int, default=20, help="Number of epochs for training."
+    )
+    ## Loss function
+    parser.add_argument(
+        "--init_initial_learning_rate",
+        type=float,
+        default=1e-3,
+        help="The init initial learning rate value.",
+    )
+    parser.add_argument(
+        "--MSIP_sizes", type=str, default="8,16,24,32,40", help="MSIP sizes."
+    )
+    parser.add_argument(
+        "--MSIP_factor_loss",
+        type=str,
+        default="256.0,64.0,16.0,4.0,1.0",
+        help="MSIP loss balancing parameters.",
+    )
+    parser.add_argument("--ori_loss_balance", type=float, default=100.0, help="")
+    ## Dataset generation
+    parser.add_argument(
+        "--patch_size",
+        type=int,
+        default=192,
+        help="The patch size of the generated dataset.",
+    )
+    parser.add_argument(
+        "--max_angle",
+        type=int,
+        default=180,
+        help="The max angle value for generating a synthetic view to train REKD.",
+    )
+    parser.add_argument(
+        "--min_scale",
+        type=float,
+        default=1.0,
+        help="The min scale value for generating a synthetic view to train REKD.",
+    )
+    parser.add_argument(
+        "--max_scale",
+        type=float,
+        default=1.0,
+        help="The max scale value for generating a synthetic view to train REKD.",
+    )
+    parser.add_argument(
+        "--max_shearing",
+        type=float,
+        default=0.0,
+        help="The max shearing value for generating a synthetic view to train REKD.",
+    )
+    parser.add_argument(
+        "--num_training_data",
+        type=int,
+        default=9000,
+        help="The number of the generated dataset.",
+    )
+    parser.add_argument(
+        "--is_debugging",
+        type=bool,
+        default=False,
+        help="Set variable to True if you desire to train network on a smaller dataset.",
+    )
+    ## For eval/inference
+    parser.add_argument(
+        "--num_points",
+        type=int,
+        default=1500,
+        help="the number of points at evaluation time.",
+    )
+    parser.add_argument(
+        "--pyramid_levels", type=int, default=5, help="downsampling pyramid levels."
+    )
+    parser.add_argument(
+        "--upsampled_levels", type=int, default=2, help="upsampling image levels."
+    )
+    parser.add_argument(
+        "--nms_size",
+        type=int,
+        default=15,
+        help="The NMS size for computing the validation repeatability.",
+    )
+    parser.add_argument(
+        "--border_size",
+        type=int,
+        default=15,
+        help="The number of pixels to remove from the borders to compute the repeatability.",
+    )
+    ## For HPatches evaluation
+    parser.add_argument(
+        "--hpatches_path",
+        type=str,
+        default="./datasets/hpatches-sequences-release",
+        help="dataset ",
+    )
+    parser.add_argument(
+        "--eval_split",
+        type=str,
+        default="debug",
+        help="debug, view, illum, full, debug_view, debug_illum ...",
+    )
+    parser.add_argument(
+        "--descriptor", type=str, default="hardnet", help="hardnet, sosnet, hynet"
+    )
+    args, weird_args = (
+        parser.parse_known_args() if not jupyter else parser.parse_args(args=[])
+    )
+    fix_randseed(12345)
+    if args.synth_dir == "":
+        args.synth_dir = "datasets/synth_data"
+    args.MSIP_sizes = [int(i) for i in args.MSIP_sizes.split(",")]
+    args.MSIP_factor_loss = [float(i) for i in args.MSIP_factor_loss.split(",")]
+    return args
+def fix_randseed(randseed):
+    r"""Fix random seed"""
+    random.seed(randseed)
+    numpy.random.seed(randseed)
+    torch.manual_seed(randseed)
+    torch.cuda.manual_seed(randseed)
+    torch.cuda.manual_seed_all(randseed)
+    torch.backends.cudnn.benchmark, torch.backends.cudnn.deterministic = False, True
+    # torch.backends.cudnn.benchmark, torch.backends.cudnn.deterministic = True, False

imcui/third_party/dad/dad/detectors/third_party/rekd/geometry_tools.py ADDED Viewed

	@@ -0,0 +1,204 @@

+from cv2 import warpPerspective as applyH
+import numpy as np
+import torch
+def apply_nms(score_map, size):
+    from scipy.ndimage.filters import maximum_filter
+    score_map = score_map * (
+        score_map == maximum_filter(score_map, footprint=np.ones((size, size)))
+    )
+    return score_map
+def remove_borders(images, borders):
+    ## input [B,C,H,W]
+    shape = images.shape
+    if len(shape) == 4:
+        for batch_id in range(shape[0]):
+            images[batch_id, :, 0:borders, :] = 0
+            images[batch_id, :, :, 0:borders] = 0
+            images[batch_id, :, shape[2] - borders : shape[2], :] = 0
+            images[batch_id, :, :, shape[3] - borders : shape[3]] = 0
+    elif len(shape) == 2:
+        images[0:borders, :] = 0
+        images[:, 0:borders] = 0
+        images[shape[0] - borders : shape[0], :] = 0
+        images[:, shape[1] - borders : shape[1]] = 0
+    else:
+        print("Not implemented")
+        exit()
+    return images
+def create_common_region_masks(h_dst_2_src, shape_src, shape_dst):
+    # Create mask. Only take into account pixels in the two images
+    inv_h = np.linalg.inv(h_dst_2_src)
+    inv_h = inv_h / inv_h[2, 2]
+    # Applies mask to destination. Where there is no 1, we can no find a point in source.
+    ones_dst = np.ones((shape_dst[0], shape_dst[1]))
+    ones_dst = remove_borders(ones_dst, borders=15)
+    mask_src = applyH(ones_dst, h_dst_2_src, (shape_src[1], shape_src[0]))
+    mask_src = np.where(mask_src >= 0.75, 1.0, 0.0)
+    mask_src = remove_borders(mask_src, borders=15)
+    ones_src = np.ones((shape_src[0], shape_src[1]))
+    ones_src = remove_borders(ones_src, borders=15)
+    mask_dst = applyH(ones_src, inv_h, (shape_dst[1], shape_dst[0]))
+    mask_dst = np.where(mask_dst >= 0.75, 1.0, 0.0)
+    mask_dst = remove_borders(mask_dst, borders=15)
+    return mask_src, mask_dst
+def prepare_homography(hom):
+    if len(hom.shape) == 1:
+        h = np.zeros((3, 3))
+        for j in range(3):
+            for i in range(3):
+                if j == 2 and i == 2:
+                    h[j, i] = 1.0
+                else:
+                    h[j, i] = hom[j * 3 + i]
+    elif len(hom.shape) == 2:  ## batch
+        ones = torch.ones(hom.shape[0]).unsqueeze(1)
+        h = torch.cat([hom, ones], dim=1).reshape(-1, 3, 3).type(torch.float32)
+    return h
+def getAff(x, y, H):
+    h11 = H[0, 0]
+    h12 = H[0, 1]
+    h13 = H[0, 2]
+    h21 = H[1, 0]
+    h22 = H[1, 1]
+    h23 = H[1, 2]
+    h31 = H[2, 0]
+    h32 = H[2, 1]
+    h33 = H[2, 2]
+    fxdx = (
+        h11 / (h31 * x + h32 * y + h33)
+        - (h11 * x + h12 * y + h13) * h31 / (h31 * x + h32 * y + h33) ** 2
+    )
+    fxdy = (
+        h12 / (h31 * x + h32 * y + h33)
+        - (h11 * x + h12 * y + h13) * h32 / (h31 * x + h32 * y + h33) ** 2
+    )
+    fydx = (
+        h21 / (h31 * x + h32 * y + h33)
+        - (h21 * x + h22 * y + h23) * h31 / (h31 * x + h32 * y + h33) ** 2
+    )
+    fydy = (
+        h22 / (h31 * x + h32 * y + h33)
+        - (h21 * x + h22 * y + h23) * h32 / (h31 * x + h32 * y + h33) ** 2
+    )
+    Aff = [[fxdx, fxdy], [fydx, fydy]]
+    return np.asarray(Aff)
+def apply_homography_to_points(points, h):
+    new_points = []
+    for point in points:
+        new_point = h.dot([point[0], point[1], 1.0])
+        tmp = point[2] ** 2 + np.finfo(np.float32).eps
+        Mi1 = [[1 / tmp, 0], [0, 1 / tmp]]
+        Mi1_inv = np.linalg.inv(Mi1)
+        Aff = getAff(point[0], point[1], h)
+        BMB = np.linalg.inv(np.dot(Aff, np.dot(Mi1_inv, np.matrix.transpose(Aff))))
+        [e, _] = np.linalg.eig(BMB)
+        new_radious = 1 / ((e[0] * e[1]) ** 0.5) ** 0.5
+        new_point = [
+            new_point[0] / new_point[2],
+            new_point[1] / new_point[2],
+            new_radious,
+            point[3],
+        ]
+        new_points.append(new_point)
+    return np.asarray(new_points)
+def find_index_higher_scores(map, num_points=1000, threshold=-1):
+    # Best n points
+    if threshold == -1:
+        flatten = map.flatten()
+        order_array = np.sort(flatten)
+        order_array = np.flip(order_array, axis=0)
+        if order_array.shape[0] < num_points:
+            num_points = order_array.shape[0]
+        threshold = order_array[num_points - 1]
+        if threshold <= 0.0:
+            ### This is the problem case which derive smaller number of keypoints than the argument "num_points".
+            indexes = np.argwhere(order_array > 0.0)
+            if len(indexes) == 0:
+                threshold = 0.0
+            else:
+                threshold = order_array[indexes[len(indexes) - 1]]
+    indexes = np.argwhere(map >= threshold)
+    return indexes[:num_points]
+def get_point_coordinates(
+    map, scale_value=1.0, num_points=1000, threshold=-1, order_coord="xysr"
+):
+    ## input numpy array score map : [H, W]
+    indexes = find_index_higher_scores(map, num_points=num_points, threshold=threshold)
+    new_indexes = []
+    for ind in indexes:
+        scores = map[ind[0], ind[1]]
+        if order_coord == "xysr":
+            tmp = [ind[1], ind[0], scale_value, scores]
+        elif order_coord == "yxsr":
+            tmp = [ind[0], ind[1], scale_value, scores]
+        new_indexes.append(tmp)
+    indexes = np.asarray(new_indexes)
+    return np.asarray(indexes)
+def get_point_coordinates3D(
+    map,
+    scale_factor=1.0,
+    up_levels=0,
+    num_points=1000,
+    threshold=-1,
+    order_coord="xysr",
+):
+    indexes = find_index_higher_scores(map, num_points=num_points, threshold=threshold)
+    new_indexes = []
+    for ind in indexes:
+        scale_value = scale_factor ** (ind[2] - up_levels)
+        scores = map[ind[0], ind[1], ind[2]]
+        if order_coord == "xysr":
+            tmp = [ind[1], ind[0], scale_value, scores]
+        elif order_coord == "yxsr":
+            tmp = [ind[0], ind[1], scale_value, scores]
+        new_indexes.append(tmp)
+    indexes = np.asarray(new_indexes)
+    return np.asarray(indexes)

imcui/third_party/dad/dad/detectors/third_party/rekd/model/REKD.py ADDED Viewed

	@@ -0,0 +1,234 @@

+import torch
+import torch.nn.functional as F
+from .kernels import gaussian_multiple_channels
+class REKD(torch.nn.Module):
+    def __init__(self, args, device):
+        super(REKD, self).__init__()
+        from e2cnn import gspaces
+        from e2cnn import nn
+        self.pyramid_levels = 3
+        self.factor_scaling = args.factor_scaling_pyramid
+        # Smooth Gausian Filter
+        num_channels = 1  ## gray scale image
+        self.gaussian_avg = gaussian_multiple_channels(num_channels, 1.5)
+        r2_act = gspaces.Rot2dOnR2(N=args.group_size)
+        self.feat_type_in = nn.FieldType(
+            r2_act, num_channels * [r2_act.trivial_repr]
+        )  ## input 1 channels (gray scale image)
+        feat_type_out1 = nn.FieldType(r2_act, args.dim_first * [r2_act.regular_repr])
+        feat_type_out2 = nn.FieldType(r2_act, args.dim_second * [r2_act.regular_repr])
+        feat_type_out3 = nn.FieldType(r2_act, args.dim_third * [r2_act.regular_repr])
+        feat_type_ori_est = nn.FieldType(r2_act, [r2_act.regular_repr])
+        self.block1 = nn.SequentialModule(
+            nn.R2Conv(
+                self.feat_type_in, feat_type_out1, kernel_size=5, padding=2, bias=False
+            ),
+            nn.InnerBatchNorm(feat_type_out1),
+            nn.ReLU(feat_type_out1, inplace=True),
+        )
+        self.block2 = nn.SequentialModule(
+            nn.R2Conv(
+                feat_type_out1, feat_type_out2, kernel_size=5, padding=2, bias=False
+            ),
+            nn.InnerBatchNorm(feat_type_out2),
+            nn.ReLU(feat_type_out2, inplace=True),
+        )
+        self.block3 = nn.SequentialModule(
+            nn.R2Conv(
+                feat_type_out2, feat_type_out3, kernel_size=5, padding=2, bias=False
+            ),
+            nn.InnerBatchNorm(feat_type_out3),
+            nn.ReLU(feat_type_out3, inplace=True),
+        )
+        self.ori_learner = nn.SequentialModule(
+            nn.R2Conv(
+                feat_type_out3, feat_type_ori_est, kernel_size=1, padding=0, bias=False
+            )  ## Channel pooling by 8*G -> 1*G conv.
+        )
+        self.softmax = torch.nn.Softmax(dim=1)
+        self.gpool = nn.GroupPooling(feat_type_out3)
+        self.last_layer_learner = torch.nn.Sequential(
+            torch.nn.BatchNorm2d(num_features=args.dim_third * self.pyramid_levels),
+            torch.nn.Conv2d(
+                in_channels=args.dim_third * self.pyramid_levels,
+                out_channels=1,
+                kernel_size=1,
+                bias=True,
+            ),
+            torch.nn.ReLU(inplace=True),  ## clamp to make the scores positive values.
+        )
+        self.dim_third = args.dim_third
+        self.group_size = args.group_size
+        self.exported = False
+    def export(self):
+        from e2cnn import nn
+        for name, module in dict(self.named_modules()).copy().items():
+            if isinstance(module, nn.EquivariantModule):
+                # print(name, "--->", module)
+                module = module.export()
+                setattr(self, name, module)
+        self.exported = True
+    def forward(self, input_data):
+        features_key, features_o = self.compute_features(input_data)
+        return features_key, features_o
+    def compute_features(self, input_data):
+        B, _, H, W = input_data.shape
+        for idx_level in range(self.pyramid_levels):
+            with torch.no_grad():
+                input_data_resized = self._resize_input_image(
+                    input_data, idx_level, H, W
+                )
+            if H > 2500 or W > 2500:
+                features_t, features_o = self._forwarding_networks_divide_grid(
+                    input_data_resized
+                )
+            else:
+                features_t, features_o = self._forwarding_networks(input_data_resized)
+            features_t = F.interpolate(
+                features_t, size=(H, W), align_corners=True, mode="bilinear"
+            )
+            features_o = F.interpolate(
+                features_o, size=(H, W), align_corners=True, mode="bilinear"
+            )
+            if idx_level == 0:
+                features_key = features_t
+                features_ori = features_o
+            else:
+                features_key = torch.cat([features_key, features_t], axis=1)
+                features_ori = torch.add(features_ori, features_o)
+        features_key = self.last_layer_learner(features_key)
+        features_ori = self.softmax(features_ori)
+        return features_key, features_ori
+    def _forwarding_networks(self, input_data_resized):
+        from e2cnn import nn
+        # wrap the input tensor in a GeometricTensor (associate it with the input type)
+        features_t = (
+            nn.GeometricTensor(input_data_resized, self.feat_type_in)
+            if not self.exported
+            else input_data_resized
+        )
+        ## Geometric tensor feed forwarding
+        features_t = self.block1(features_t)
+        features_t = self.block2(features_t)
+        features_t = self.block3(features_t)
+        ## orientation pooling
+        features_o = self.ori_learner(features_t)  ## self.cpool
+        features_o = features_o.tensor if not self.exported else features_o
+        ## keypoint pooling
+        features_t = self.gpool(features_t)
+        features_t = features_t.tensor if not self.exported else features_t
+        return features_t, features_o
+    def _forwarding_networks_divide_grid(self, input_data_resized):
+        ## for inference time high resolution image. # spatial grid 4
+        B, _, H_resized, W_resized = input_data_resized.shape
+        features_t = torch.zeros(B, self.dim_third, H_resized, W_resized).cuda()
+        features_o = torch.zeros(B, self.group_size, H_resized, W_resized).cuda()
+        h_divide = 2
+        w_divide = 2
+        for idx in range(h_divide):
+            for jdx in range(w_divide):
+                ## compute the start and end spatial index
+                h_start = H_resized // h_divide * idx
+                w_start = W_resized // w_divide * jdx
+                h_end = H_resized // h_divide * (idx + 1)
+                w_end = W_resized // w_divide * (jdx + 1)
+                ## crop the input image
+                input_data_divided = input_data_resized[
+                    :, :, h_start:h_end, w_start:w_end
+                ]
+                features_t_temp, features_o_temp = self._forwarding_networks(
+                    input_data_divided
+                )
+                ## take into the values.
+                features_t[:, :, h_start:h_end, w_start:w_end] = features_t_temp
+                features_o[:, :, h_start:h_end, w_start:w_end] = features_o_temp
+        return features_t, features_o
+    def _resize_input_image(self, input_data, idx_level, H, W):
+        if idx_level == 0:
+            input_data_smooth = input_data
+        else:
+            ## (7,7) size gaussian kernel.
+            input_data_smooth = F.conv2d(
+                input_data, self.gaussian_avg.to(input_data.device), padding=[3, 3]
+            )
+        target_resize = (
+            int(H / (self.factor_scaling**idx_level)),
+            int(W / (self.factor_scaling**idx_level)),
+        )
+        input_data_resized = F.interpolate(
+            input_data_smooth, size=target_resize, align_corners=True, mode="bilinear"
+        )
+        input_data_resized = self.local_norm_image(input_data_resized)
+        return input_data_resized
+    def local_norm_image(self, x, k_size=65, eps=1e-10):
+        pad = int(k_size / 2)
+        x_pad = F.pad(x, (pad, pad, pad, pad), mode="reflect")
+        x_mean = F.avg_pool2d(
+            x_pad, kernel_size=[k_size, k_size], stride=[1, 1], padding=0
+        )  ## padding='valid'==0
+        x2_mean = F.avg_pool2d(
+            torch.pow(x_pad, 2.0),
+            kernel_size=[k_size, k_size],
+            stride=[1, 1],
+            padding=0,
+        )
+        x_std = torch.sqrt(torch.abs(x2_mean - x_mean * x_mean)) + eps
+        x_norm = (x - x_mean) / (1.0 + x_std)
+        return x_norm
+def count_model_parameters(model):
+    ## Count the number of learnable parameters.
+    print("================ List of Learnable model parameters ================ ")
+    for n, p in model.named_parameters():
+        if p.requires_grad:
+            print("{} {}".format(n, p.data.shape))
+        else:
+            print("\n\n\n None learnable params {} {}".format(n, p.data.shape))
+    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
+    params = sum([torch.prod(torch.tensor(p.size())) for p in model_parameters])
+    print("The number of learnable parameters : {} ".format(params.data))
+    print("==================================================================== ")

imcui/third_party/dad/dad/detectors/third_party/rekd/model/kernels.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import math
+import torch
+def gaussian_multiple_channels(num_channels, sigma):
+    r = 2 * sigma
+    size = 2 * r + 1
+    size = int(math.ceil(size))
+    x = torch.arange(0, size, 1, dtype=torch.float)
+    y = x.unsqueeze(1)
+    x0 = y0 = r
+    gaussian = torch.exp(-1 * (((x - x0) ** 2 + (y - y0) ** 2) / (2 * (sigma**2)))) / (
+        (2 * math.pi * (sigma**2)) ** 0.5
+    )
+    gaussian = gaussian.to(dtype=torch.float32)
+    weights = torch.zeros((num_channels, num_channels, size, size), dtype=torch.float32)
+    for i in range(num_channels):
+        weights[i, i, :, :] = gaussian
+    return weights
+def ones_multiple_channels(size, num_channels):
+    ones = torch.ones((size, size))
+    weights = torch.zeros((num_channels, num_channels, size, size), dtype=torch.float32)
+    for i in range(num_channels):
+        weights[i, i, :, :] = ones
+    return weights
+def grid_indexes(size):
+    weights = torch.zeros((2, 1, size, size), dtype=torch.float32)
+    columns = []
+    for idx in range(1, 1 + size):
+        columns.append(torch.ones((size)) * idx)
+    columns = torch.stack(columns)
+    rows = []
+    for idx in range(1, 1 + size):
+        rows.append(torch.tensor(range(1, 1 + size)))
+    rows = torch.stack(rows)
+    weights[0, 0, :, :] = columns
+    weights[1, 0, :, :] = rows
+    return weights
+def get_kernel_size(factor):
+    """
+    Find the kernel size given the desired factor of upsampling.
+    """
+    return 2 * factor - factor % 2
+def linear_upsample_weights(half_factor, number_of_classes):
+    """
+    Create weights matrix for transposed convolution with linear filter
+    initialization.
+    """
+    filter_size = get_kernel_size(half_factor)
+    weights = torch.zeros(
+        (
+            number_of_classes,
+            number_of_classes,
+            filter_size,
+            filter_size,
+        ),
+        dtype=torch.float32,
+    )
+    upsample_kernel = torch.ones((filter_size, filter_size))
+    for i in range(number_of_classes):
+        weights[i, i, :, :] = upsample_kernel
+    return weights
+class Kernels_custom:
+    def __init__(self, args, MSIP_sizes=[]):
+        self.batch_size = args.batch_size
+        # create_kernels
+        self.kernels = {}
+        if MSIP_sizes != []:
+            self.create_kernels(MSIP_sizes)
+        if 8 not in MSIP_sizes:
+            self.create_kernels([8])
+    def create_kernels(self, MSIP_sizes):
+        # Grid Indexes for MSIP
+        for ksize in MSIP_sizes:
+            ones_kernel = ones_multiple_channels(ksize, 1)
+            indexes_kernel = grid_indexes(ksize)
+            upsample_filter_np = linear_upsample_weights(int(ksize / 2), 1)
+            self.ones_kernel = ones_kernel.requires_grad_(False)
+            self.kernels["ones_kernel_" + str(ksize)] = self.ones_kernel
+            self.upsample_filter_np = upsample_filter_np.requires_grad_(False)
+            self.kernels["upsample_filter_np_" + str(ksize)] = self.upsample_filter_np
+            self.indexes_kernel = indexes_kernel.requires_grad_(False)
+            self.kernels["indexes_kernel_" + str(ksize)] = self.indexes_kernel
+    def get_kernels(self, device):
+        kernels = {}
+        for k, v in self.kernels.items():
+            kernels[k] = v.to(device)
+        return kernels

imcui/third_party/dad/dad/detectors/third_party/rekd/model/load_models.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import torch
+from .REKD import REKD
+def load_detector(args, device):
+    args.group_size, args.dim_first, args.dim_second, args.dim_third = model_parsing(
+        args
+    )
+    model1 = REKD(args, device)
+    model1.load_state_dict(torch.load(args.load_dir, weights_only=True))
+    model1.export()
+    model1.eval()
+    model1.to(device)  ## use GPU
+    return model1
+## Load our model
+def model_parsing(args):
+    group_size = args.load_dir.split("_group")[1].split("_")[0]
+    dim_first = args.load_dir.split("_f")[1].split("_")[0]
+    dim_second = args.load_dir.split("_s")[1].split("_")[0]
+    dim_third = args.load_dir.split("_t")[1].split(".log")[0]
+    return int(group_size), int(dim_first), int(dim_second), int(dim_third)