Spaces:

Realcat
/

image-matching-webui

Running

App Files Files Community

Realcat commited on Feb 26

Commit

45fa4b6

1 Parent(s): 09c4be0

update: api

Browse files

Files changed (14) hide show

imcui/api/__init__.py +47 -47
imcui/api/client.py +232 -232
imcui/api/config/api.yaml +35 -51
imcui/api/core.py +308 -308
imcui/api/server.py +186 -170
imcui/api/test/build_and_run.sh +16 -16
imcui/api/test/client.cpp +81 -81
imcui/api/test/helper.h +405 -405
imcui/ui/__init__.py +5 -5
imcui/ui/app_class.py +816 -820
imcui/ui/modelcache.py +371 -0
imcui/ui/sfm.py +164 -164
imcui/ui/utils.py +1108 -1164
imcui/ui/viz.py +481 -481

imcui/api/__init__.py CHANGED Viewed

@@ -1,47 +1,47 @@
-import base64
-import io
-from typing import List
-import numpy as np
-from fastapi.exceptions import HTTPException
-from PIL import Image
-from pydantic import BaseModel
-from ..hloc import logger
-from .core import ImageMatchingAPI
-class ImagesInput(BaseModel):
-    data: List[str] = []
-    max_keypoints: List[int] = []
-    timestamps: List[str] = []
-    grayscale: bool = False
-    image_hw: List[List[int]] = [[], []]
-    feature_type: int = 0
-    rotates: List[float] = []
-    scales: List[float] = []
-    reference_points: List[List[float]] = []
-    binarize: bool = False
-def decode_base64_to_image(encoding):
-    if encoding.startswith("data:image/"):
-        encoding = encoding.split(";")[1].split(",")[1]
-    try:
-        image = Image.open(io.BytesIO(base64.b64decode(encoding)))
-        return image
-    except Exception as e:
-        logger.warning(f"API cannot decode image: {e}")
-        raise HTTPException(status_code=500, detail="Invalid encoded image") from e
-def to_base64_nparray(encoding: str) -> np.ndarray:
-    return np.array(decode_base64_to_image(encoding)).astype("uint8")
-__all__ = [
-    "ImageMatchingAPI",
-    "ImagesInput",
-    "decode_base64_to_image",
-    "to_base64_nparray",
-]

+import base64
+import io
+from typing import List
+import numpy as np
+from fastapi.exceptions import HTTPException
+from PIL import Image
+from pydantic import BaseModel
+from ..hloc import logger
+from .core import ImageMatchingAPI
+class ImagesInput(BaseModel):
+    data: List[str] = []
+    max_keypoints: List[int] = []
+    timestamps: List[str] = []
+    grayscale: bool = False
+    image_hw: List[List[int]] = [[], []]
+    feature_type: int = 0
+    rotates: List[float] = []
+    scales: List[float] = []
+    reference_points: List[List[float]] = []
+    binarize: bool = False
+def decode_base64_to_image(encoding):
+    if encoding.startswith("data:image/"):
+        encoding = encoding.split(";")[1].split(",")[1]
+    try:
+        image = Image.open(io.BytesIO(base64.b64decode(encoding)))
+        return image
+    except Exception as e:
+        logger.warning(f"API cannot decode image: {e}")
+        raise HTTPException(status_code=500, detail="Invalid encoded image") from e
+def to_base64_nparray(encoding: str) -> np.ndarray:
+    return np.array(decode_base64_to_image(encoding)).astype("uint8")
+__all__ = [
+    "ImageMatchingAPI",
+    "ImagesInput",
+    "decode_base64_to_image",
+    "to_base64_nparray",
+]

imcui/api/client.py CHANGED Viewed

@@ -1,232 +1,232 @@
-import argparse
-import base64
-import os
-import pickle
-import time
-from typing import Dict, List
-import cv2
-import numpy as np
-import requests
-ENDPOINT = "http://127.0.0.1:8001"
-if "REMOTE_URL_RAILWAY" in os.environ:
-    ENDPOINT = os.environ["REMOTE_URL_RAILWAY"]
-print(f"API ENDPOINT: {ENDPOINT}")
-API_VERSION = f"{ENDPOINT}/version"
-API_URL_MATCH = f"{ENDPOINT}/v1/match"
-API_URL_EXTRACT = f"{ENDPOINT}/v1/extract"
-def read_image(path: str) -> str:
-    """
-    Read an image from a file, encode it as a JPEG and then as a base64 string.
-    Args:
-        path (str): The path to the image to read.
-    Returns:
-        str: The base64 encoded image.
-    """
-    # Read the image from the file
-    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
-    # Encode the image as a png, NO COMPRESSION!!!
-    retval, buffer = cv2.imencode(".png", img)
-    # Encode the JPEG as a base64 string
-    b64img = base64.b64encode(buffer).decode("utf-8")
-    return b64img
-def do_api_requests(url=API_URL_EXTRACT, **kwargs):
-    """
-    Helper function to send an API request to the image matching service.
-    Args:
-        url (str): The URL of the API endpoint to use. Defaults to the
-            feature extraction endpoint.
-        **kwargs: Additional keyword arguments to pass to the API.
-    Returns:
-        List[Dict[str, np.ndarray]]: A list of dictionaries containing the
-            extracted features. The keys are "keypoints", "descriptors", and
-            "scores", and the values are ndarrays of shape (N, 2), (N, ?),
-            and (N,), respectively.
-    """
-    # Set up the request body
-    reqbody = {
-        # List of image data base64 encoded
-        "data": [],
-        # List of maximum number of keypoints to extract from each image
-        "max_keypoints": [100, 100],
-        # List of timestamps for each image (not used?)
-        "timestamps": ["0", "1"],
-        # Whether to convert the images to grayscale
-        "grayscale": 0,
-        # List of image height and width
-        "image_hw": [[640, 480], [320, 240]],
-        # Type of feature to extract
-        "feature_type": 0,
-        # List of rotation angles for each image
-        "rotates": [0.0, 0.0],
-        # List of scale factors for each image
-        "scales": [1.0, 1.0],
-        # List of reference points for each image (not used)
-        "reference_points": [[640, 480], [320, 240]],
-        # Whether to binarize the descriptors
-        "binarize": True,
-    }
-    # Update the request body with the additional keyword arguments
-    reqbody.update(kwargs)
-    try:
-        # Send the request
-        r = requests.post(url, json=reqbody)
-        if r.status_code == 200:
-            # Return the response
-            return r.json()
-        else:
-            # Print an error message if the response code is not 200
-            print(f"Error: Response code {r.status_code} - {r.text}")
-    except Exception as e:
-        # Print an error message if an exception occurs
-        print(f"An error occurred: {e}")
-def send_request_match(path0: str, path1: str) -> Dict[str, np.ndarray]:
-    """
-    Send a request to the API to generate a match between two images.
-    Args:
-        path0 (str): The path to the first image.
-        path1 (str): The path to the second image.
-    Returns:
-        Dict[str, np.ndarray]: A dictionary containing the generated matches.
-            The keys are "keypoints0", "keypoints1", "matches0", and "matches1",
-            and the values are ndarrays of shape (N, 2), (N, 2), (N, 2), and
-            (N, 2), respectively.
-    """
-    files = {"image0": open(path0, "rb"), "image1": open(path1, "rb")}
-    try:
-        # TODO: replace files with post json
-        response = requests.post(API_URL_MATCH, files=files)
-        pred = {}
-        if response.status_code == 200:
-            pred = response.json()
-            for key in list(pred.keys()):
-                pred[key] = np.array(pred[key])
-        else:
-            print(f"Error: Response code {response.status_code} - {response.text}")
-    finally:
-        files["image0"].close()
-        files["image1"].close()
-    return pred
-def send_request_extract(
-    input_images: str, viz: bool = False
-) -> List[Dict[str, np.ndarray]]:
-    """
-    Send a request to the API to extract features from an image.
-    Args:
-        input_images (str): The path to the image.
-    Returns:
-        List[Dict[str, np.ndarray]]: A list of dictionaries containing the
-            extracted features. The keys are "keypoints", "descriptors", and
-            "scores", and the values are ndarrays of shape (N, 2), (N, 128),
-            and (N,), respectively.
-    """
-    image_data = read_image(input_images)
-    inputs = {
-        "data": [image_data],
-    }
-    response = do_api_requests(
-        url=API_URL_EXTRACT,
-        **inputs,
-    )
-    # breakpoint()
-    # print("Keypoints detected: {}".format(len(response[0]["keypoints"])))
-    # draw matching, debug only
-    if viz:
-        from hloc.utils.viz import plot_keypoints
-        from ui.viz import fig2im, plot_images
-        kpts = np.array(response[0]["keypoints_orig"])
-        if "image_orig" in response[0].keys():
-            img_orig = np.array(["image_orig"])
-            output_keypoints = plot_images([img_orig], titles="titles", dpi=300)
-            plot_keypoints([kpts])
-            output_keypoints = fig2im(output_keypoints)
-            cv2.imwrite(
-                "demo_match.jpg",
-                output_keypoints[:, :, ::-1].copy(),  # RGB -> BGR
-            )
-    return response
-def get_api_version():
-    try:
-        response = requests.get(API_VERSION).json()
-        print("API VERSION: {}".format(response["version"]))
-    except Exception as e:
-        print(f"An error occurred: {e}")
-if __name__ == "__main__":
-    from pathlib import Path
-    parser = argparse.ArgumentParser(
-        description="Send text to stable audio server and receive generated audio."
-    )
-    parser.add_argument(
-        "--image0",
-        required=False,
-        help="Path for the file's melody",
-        default=str(
-            Path(__file__).parents[1]
-            / "datasets/sacre_coeur/mapping_rot/02928139_3448003521_rot45.jpg"
-        ),
-    )
-    parser.add_argument(
-        "--image1",
-        required=False,
-        help="Path for the file's melody",
-        default=str(
-            Path(__file__).parents[1]
-            / "datasets/sacre_coeur/mapping_rot/02928139_3448003521_rot90.jpg"
-        ),
-    )
-    args = parser.parse_args()
-    # get api version
-    get_api_version()
-    # request match
-    # for i in range(10):
-    #     t1 = time.time()
-    #     preds = send_request_match(args.image0, args.image1)
-    #     t2 = time.time()
-    #     print(
-    #         "Time cost1: {} seconds, matched: {}".format(
-    #             (t2 - t1), len(preds["mmkeypoints0_orig"])
-    #         )
-    #     )
-    # request extract
-    for i in range(1000):
-        t1 = time.time()
-        preds = send_request_extract(args.image0)
-        t2 = time.time()
-        print(f"Time cost2: {(t2 - t1)} seconds")
-    # dump preds
-    with open("preds.pkl", "wb") as f:
-        pickle.dump(preds, f)

+import argparse
+import base64
+import os
+import pickle
+import time
+from typing import Dict, List
+import cv2
+import numpy as np
+import requests
+ENDPOINT = "http://127.0.0.1:8001"
+if "REMOTE_URL_RAILWAY" in os.environ:
+    ENDPOINT = os.environ["REMOTE_URL_RAILWAY"]
+print(f"API ENDPOINT: {ENDPOINT}")
+API_VERSION = f"{ENDPOINT}/version"
+API_URL_MATCH = f"{ENDPOINT}/v1/match"
+API_URL_EXTRACT = f"{ENDPOINT}/v1/extract"
+def read_image(path: str) -> str:
+    """
+    Read an image from a file, encode it as a JPEG and then as a base64 string.
+    Args:
+        path (str): The path to the image to read.
+    Returns:
+        str: The base64 encoded image.
+    """
+    # Read the image from the file
+    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
+    # Encode the image as a png, NO COMPRESSION!!!
+    retval, buffer = cv2.imencode(".png", img)
+    # Encode the JPEG as a base64 string
+    b64img = base64.b64encode(buffer).decode("utf-8")
+    return b64img
+def do_api_requests(url=API_URL_EXTRACT, **kwargs):
+    """
+    Helper function to send an API request to the image matching service.
+    Args:
+        url (str): The URL of the API endpoint to use. Defaults to the
+            feature extraction endpoint.
+        **kwargs: Additional keyword arguments to pass to the API.
+    Returns:
+        List[Dict[str, np.ndarray]]: A list of dictionaries containing the
+            extracted features. The keys are "keypoints", "descriptors", and
+            "scores", and the values are ndarrays of shape (N, 2), (N, ?),
+            and (N,), respectively.
+    """
+    # Set up the request body
+    reqbody = {
+        # List of image data base64 encoded
+        "data": [],
+        # List of maximum number of keypoints to extract from each image
+        "max_keypoints": [100, 100],
+        # List of timestamps for each image (not used?)
+        "timestamps": ["0", "1"],
+        # Whether to convert the images to grayscale
+        "grayscale": 0,
+        # List of image height and width
+        "image_hw": [[640, 480], [320, 240]],
+        # Type of feature to extract
+        "feature_type": 0,
+        # List of rotation angles for each image
+        "rotates": [0.0, 0.0],
+        # List of scale factors for each image
+        "scales": [1.0, 1.0],
+        # List of reference points for each image (not used)
+        "reference_points": [[640, 480], [320, 240]],
+        # Whether to binarize the descriptors
+        "binarize": True,
+    }
+    # Update the request body with the additional keyword arguments
+    reqbody.update(kwargs)
+    try:
+        # Send the request
+        r = requests.post(url, json=reqbody)
+        if r.status_code == 200:
+            # Return the response
+            return r.json()
+        else:
+            # Print an error message if the response code is not 200
+            print(f"Error: Response code {r.status_code} - {r.text}")
+    except Exception as e:
+        # Print an error message if an exception occurs
+        print(f"An error occurred: {e}")
+def send_request_match(path0: str, path1: str) -> Dict[str, np.ndarray]:
+    """
+    Send a request to the API to generate a match between two images.
+    Args:
+        path0 (str): The path to the first image.
+        path1 (str): The path to the second image.
+    Returns:
+        Dict[str, np.ndarray]: A dictionary containing the generated matches.
+            The keys are "keypoints0", "keypoints1", "matches0", and "matches1",
+            and the values are ndarrays of shape (N, 2), (N, 2), (N, 2), and
+            (N, 2), respectively.
+    """
+    files = {"image0": open(path0, "rb"), "image1": open(path1, "rb")}
+    try:
+        # TODO: replace files with post json
+        response = requests.post(API_URL_MATCH, files=files)
+        pred = {}
+        if response.status_code == 200:
+            pred = response.json()
+            for key in list(pred.keys()):
+                pred[key] = np.array(pred[key])
+        else:
+            print(f"Error: Response code {response.status_code} - {response.text}")
+    finally:
+        files["image0"].close()
+        files["image1"].close()
+    return pred
+def send_request_extract(
+    input_images: str, viz: bool = False
+) -> List[Dict[str, np.ndarray]]:
+    """
+    Send a request to the API to extract features from an image.
+    Args:
+        input_images (str): The path to the image.
+    Returns:
+        List[Dict[str, np.ndarray]]: A list of dictionaries containing the
+            extracted features. The keys are "keypoints", "descriptors", and
+            "scores", and the values are ndarrays of shape (N, 2), (N, 128),
+            and (N,), respectively.
+    """
+    image_data = read_image(input_images)
+    inputs = {
+        "data": [image_data],
+    }
+    response = do_api_requests(
+        url=API_URL_EXTRACT,
+        **inputs,
+    )
+    # breakpoint()
+    # print("Keypoints detected: {}".format(len(response[0]["keypoints"])))
+    # draw matching, debug only
+    if viz:
+        from hloc.utils.viz import plot_keypoints
+        from ui.viz import fig2im, plot_images
+        kpts = np.array(response[0]["keypoints_orig"])
+        if "image_orig" in response[0].keys():
+            img_orig = np.array(["image_orig"])
+            output_keypoints = plot_images([img_orig], titles="titles", dpi=300)
+            plot_keypoints([kpts])
+            output_keypoints = fig2im(output_keypoints)
+            cv2.imwrite(
+                "demo_match.jpg",
+                output_keypoints[:, :, ::-1].copy(),  # RGB -> BGR
+            )
+    return response
+def get_api_version():
+    try:
+        response = requests.get(API_VERSION).json()
+        print("API VERSION: {}".format(response["version"]))
+    except Exception as e:
+        print(f"An error occurred: {e}")
+if __name__ == "__main__":
+    from pathlib import Path
+    parser = argparse.ArgumentParser(
+        description="Send text to stable audio server and receive generated audio."
+    )
+    parser.add_argument(
+        "--image0",
+        required=False,
+        help="Path for the file's melody",
+        default=str(
+            Path(__file__).parents[1]
+            / "datasets/sacre_coeur/mapping_rot/02928139_3448003521_rot45.jpg"
+        ),
+    )
+    parser.add_argument(
+        "--image1",
+        required=False,
+        help="Path for the file's melody",
+        default=str(
+            Path(__file__).parents[1]
+            / "datasets/sacre_coeur/mapping_rot/02928139_3448003521_rot90.jpg"
+        ),
+    )
+    args = parser.parse_args()
+    # get api version
+    get_api_version()
+    # request match
+    # for i in range(10):
+    #     t1 = time.time()
+    #     preds = send_request_match(args.image0, args.image1)
+    #     t2 = time.time()
+    #     print(
+    #         "Time cost1: {} seconds, matched: {}".format(
+    #             (t2 - t1), len(preds["mmkeypoints0_orig"])
+    #         )
+    #     )
+    # request extract
+    for i in range(1000):
+        t1 = time.time()
+        preds = send_request_extract(args.image0)
+        t2 = time.time()
+        print(f"Time cost2: {(t2 - t1)} seconds")
+    # dump preds
+    with open("preds.pkl", "wb") as f:
+        pickle.dump(preds, f)

imcui/api/config/api.yaml CHANGED Viewed

@@ -1,51 +1,35 @@
-# This file was generated using the `serve build` command on Ray v2.38.0.
-proxy_location: EveryNode
-http_options:
-  host: 0.0.0.0
-  port: 8001
-grpc_options:
-  port: 9000
-  grpc_servicer_functions: []
-logging_config:
-  encoding: TEXT
-  log_level: INFO
-  logs_dir: null
-  enable_access_log: true
-applications:
-- name: app1
-  route_prefix: /
-  import_path: api.server:service
-  runtime_env: {}
-  deployments:
-  - name: ImageMatchingService
-    num_replicas: 4
-    ray_actor_options:
-      num_cpus: 2.0
-      num_gpus: 1.0
-api:
-  feature:
-    output: feats-superpoint-n4096-rmax1600
-    model:
-      name: superpoint
-      nms_radius: 3
-      max_keypoints: 4096
-      keypoint_threshold: 0.005
-    preprocessing:
-      grayscale: True
-      force_resize: True
-      resize_max: 1600
-      width: 640
-      height: 480
-      dfactor: 8
-  matcher:
-    output: matches-NN-mutual
-    model:
-      name: nearest_neighbor
-      do_mutual_check: True
-      match_threshold: 0.2
-  dense: False

+service:
+  num_replicas: 4
+  ray_actor_options:
+    num_cpus: 2.0
+    num_gpus: 1.0
+  host: &default_host
+    "0.0.0.0"
+  http_options:
+    host: *default_host
+    port: 8001
+  route_prefix: "/"
+  dashboard_port: 8265
+api:
+  feature:
+    output: feats-superpoint-n4096-rmax1600
+    model:
+      name: superpoint
+      nms_radius: 3
+      max_keypoints: 4096
+      keypoint_threshold: 0.005
+    preprocessing:
+      grayscale: True
+      force_resize: True
+      resize_max: 1600
+      width: 640
+      height: 480
+      dfactor: 8
+  matcher:
+    output: matches-NN-mutual
+    model:
+      name: nearest_neighbor
+      do_mutual_check: True
+      match_threshold: 0.2
+  dense: False

imcui/api/core.py CHANGED Viewed

@@ -1,308 +1,308 @@
-# api.py
-import warnings
-from pathlib import Path
-from typing import Any, Dict, Optional
-import cv2
-import matplotlib.pyplot as plt
-import numpy as np
-import torch
-from ..hloc import extract_features, logger, match_dense, match_features
-from ..hloc.utils.viz import add_text, plot_keypoints
-from ..ui.utils import filter_matches, get_feature_model, get_model
-from ..ui.viz import display_matches, fig2im, plot_images
-warnings.simplefilter("ignore")
-class ImageMatchingAPI(torch.nn.Module):
-    default_conf = {
-        "ransac": {
-            "enable": True,
-            "estimator": "poselib",
-            "geometry": "homography",
-            "method": "RANSAC",
-            "reproj_threshold": 3,
-            "confidence": 0.9999,
-            "max_iter": 10000,
-        },
-    }
-    def __init__(
-        self,
-        conf: dict = {},
-        device: str = "cpu",
-        detect_threshold: float = 0.015,
-        max_keypoints: int = 1024,
-        match_threshold: float = 0.2,
-    ) -> None:
-        """
-        Initializes an instance of the ImageMatchingAPI class.
-        Args:
-            conf (dict): A dictionary containing the configuration parameters.
-            device (str, optional): The device to use for computation. Defaults to "cpu".
-            detect_threshold (float, optional): The threshold for detecting keypoints. Defaults to 0.015.
-            max_keypoints (int, optional): The maximum number of keypoints to extract. Defaults to 1024.
-            match_threshold (float, optional): The threshold for matching keypoints. Defaults to 0.2.
-        Returns:
-            None
-        """
-        super().__init__()
-        self.device = device
-        self.conf = {**self.default_conf, **conf}
-        self._updata_config(detect_threshold, max_keypoints, match_threshold)
-        self._init_models()
-        if device == "cuda":
-            memory_allocated = torch.cuda.memory_allocated(device)
-            memory_reserved = torch.cuda.memory_reserved(device)
-            logger.info(f"GPU memory allocated: {memory_allocated / 1024**2:.3f} MB")
-            logger.info(f"GPU memory reserved: {memory_reserved / 1024**2:.3f} MB")
-        self.pred = None
-    def parse_match_config(self, conf):
-        if conf["dense"]:
-            return {
-                **conf,
-                "matcher": match_dense.confs.get(conf["matcher"]["model"]["name"]),
-                "dense": True,
-            }
-        else:
-            return {
-                **conf,
-                "feature": extract_features.confs.get(conf["feature"]["model"]["name"]),
-                "matcher": match_features.confs.get(conf["matcher"]["model"]["name"]),
-                "dense": False,
-            }
-    def _updata_config(
-        self,
-        detect_threshold: float = 0.015,
-        max_keypoints: int = 1024,
-        match_threshold: float = 0.2,
-    ):
-        self.dense = self.conf["dense"]
-        if self.conf["dense"]:
-            try:
-                self.conf["matcher"]["model"]["match_threshold"] = match_threshold
-            except TypeError as e:
-                logger.error(e)
-        else:
-            self.conf["feature"]["model"]["max_keypoints"] = max_keypoints
-            self.conf["feature"]["model"]["keypoint_threshold"] = detect_threshold
-            self.extract_conf = self.conf["feature"]
-        self.match_conf = self.conf["matcher"]
-    def _init_models(self):
-        # initialize matcher
-        self.matcher = get_model(self.match_conf)
-        # initialize extractor
-        if self.dense:
-            self.extractor = None
-        else:
-            self.extractor = get_feature_model(self.conf["feature"])
-    def _forward(self, img0, img1):
-        if self.dense:
-            pred = match_dense.match_images(
-                self.matcher,
-                img0,
-                img1,
-                self.match_conf["preprocessing"],
-                device=self.device,
-            )
-            last_fixed = "{}".format(  # noqa: F841
-                self.match_conf["model"]["name"]
-            )
-        else:
-            pred0 = extract_features.extract(
-                self.extractor, img0, self.extract_conf["preprocessing"]
-            )
-            pred1 = extract_features.extract(
-                self.extractor, img1, self.extract_conf["preprocessing"]
-            )
-            pred = match_features.match_images(self.matcher, pred0, pred1)
-        return pred
-    def _convert_pred(self, pred):
-        ret = {
-            k: v.cpu().detach()[0].numpy() if isinstance(v, torch.Tensor) else v
-            for k, v in pred.items()
-        }
-        ret = {
-            k: v[0].cpu().detach().numpy() if isinstance(v, list) else v
-            for k, v in ret.items()
-        }
-        return ret
-    @torch.inference_mode()
-    def extract(self, img0: np.ndarray, **kwargs) -> Dict[str, np.ndarray]:
-        """Extract features from a single image.
-        Args:
-            img0 (np.ndarray): image
-        Returns:
-            Dict[str, np.ndarray]: feature dict
-        """
-        # setting prams
-        self.extractor.conf["max_keypoints"] = kwargs.get("max_keypoints", 512)
-        self.extractor.conf["keypoint_threshold"] = kwargs.get(
-            "keypoint_threshold", 0.0
-        )
-        pred = extract_features.extract(
-            self.extractor, img0, self.extract_conf["preprocessing"]
-        )
-        pred = self._convert_pred(pred)
-        # back to origin scale
-        s0 = pred["original_size"] / pred["size"]
-        pred["keypoints_orig"] = (
-            match_features.scale_keypoints(pred["keypoints"] + 0.5, s0) - 0.5
-        )
-        # TODO: rotate back
-        binarize = kwargs.get("binarize", False)
-        if binarize:
-            assert "descriptors" in pred
-            pred["descriptors"] = (pred["descriptors"] > 0).astype(np.uint8)
-            pred["descriptors"] = pred["descriptors"].T  # N x DIM
-        return pred
-    @torch.inference_mode()
-    def forward(
-        self,
-        img0: np.ndarray,
-        img1: np.ndarray,
-    ) -> Dict[str, np.ndarray]:
-        """
-        Forward pass of the image matching API.
-        Args:
-            img0: A 3D NumPy array of shape (H, W, C) representing the first image.
-                  Values are in the range [0, 1] and are in RGB mode.
-            img1: A 3D NumPy array of shape (H, W, C) representing the second image.
-                  Values are in the range [0, 1] and are in RGB mode.
-        Returns:
-            A dictionary containing the following keys:
-            - image0_orig: The original image 0.
-            - image1_orig: The original image 1.
-            - keypoints0_orig: The keypoints detected in image 0.
-            - keypoints1_orig: The keypoints detected in image 1.
-            - mkeypoints0_orig: The raw matches between image 0 and image 1.
-            - mkeypoints1_orig: The raw matches between image 1 and image 0.
-            - mmkeypoints0_orig: The RANSAC inliers in image 0.
-            - mmkeypoints1_orig: The RANSAC inliers in image 1.
-            - mconf: The confidence scores for the raw matches.
-            - mmconf: The confidence scores for the RANSAC inliers.
-        """
-        # Take as input a pair of images (not a batch)
-        assert isinstance(img0, np.ndarray)
-        assert isinstance(img1, np.ndarray)
-        self.pred = self._forward(img0, img1)
-        if self.conf["ransac"]["enable"]:
-            self.pred = self._geometry_check(self.pred)
-        return self.pred
-    def _geometry_check(
-        self,
-        pred: Dict[str, Any],
-    ) -> Dict[str, Any]:
-        """
-        Filter matches using RANSAC. If keypoints are available, filter by keypoints.
-        If lines are available, filter by lines. If both keypoints and lines are
-        available, filter by keypoints.
-        Args:
-            pred (Dict[str, Any]): dict of matches, including original keypoints.
-                                  See :func:`filter_matches` for the expected keys.
-        Returns:
-            Dict[str, Any]: filtered matches
-        """
-        pred = filter_matches(
-            pred,
-            ransac_method=self.conf["ransac"]["method"],
-            ransac_reproj_threshold=self.conf["ransac"]["reproj_threshold"],
-            ransac_confidence=self.conf["ransac"]["confidence"],
-            ransac_max_iter=self.conf["ransac"]["max_iter"],
-        )
-        return pred
-    def visualize(
-        self,
-        log_path: Optional[Path] = None,
-    ) -> None:
-        """
-        Visualize the matches.
-        Args:
-            log_path (Path, optional): The directory to save the images. Defaults to None.
-        Returns:
-            None
-        """
-        if self.conf["dense"]:
-            postfix = str(self.conf["matcher"]["model"]["name"])
-        else:
-            postfix = "{}_{}".format(
-                str(self.conf["feature"]["model"]["name"]),
-                str(self.conf["matcher"]["model"]["name"]),
-            )
-        titles = [
-            "Image 0 - Keypoints",
-            "Image 1 - Keypoints",
-        ]
-        pred: Dict[str, Any] = self.pred
-        image0: np.ndarray = pred["image0_orig"]
-        image1: np.ndarray = pred["image1_orig"]
-        output_keypoints: np.ndarray = plot_images(
-            [image0, image1], titles=titles, dpi=300
-        )
-        if "keypoints0_orig" in pred.keys() and "keypoints1_orig" in pred.keys():
-            plot_keypoints([pred["keypoints0_orig"], pred["keypoints1_orig"]])
-            text: str = (
-                f"# keypoints0: {len(pred['keypoints0_orig'])} \n"
-                + f"# keypoints1: {len(pred['keypoints1_orig'])}"
-            )
-            add_text(0, text, fs=15)
-        output_keypoints = fig2im(output_keypoints)
-        # plot images with raw matches
-        titles = [
-            "Image 0 - Raw matched keypoints",
-            "Image 1 - Raw matched keypoints",
-        ]
-        output_matches_raw, num_matches_raw = display_matches(
-            pred, titles=titles, tag="KPTS_RAW"
-        )
-        # plot images with ransac matches
-        titles = [
-            "Image 0 - Ransac matched keypoints",
-            "Image 1 - Ransac matched keypoints",
-        ]
-        output_matches_ransac, num_matches_ransac = display_matches(
-            pred, titles=titles, tag="KPTS_RANSAC"
-        )
-        if log_path is not None:
-            img_keypoints_path: Path = log_path / f"img_keypoints_{postfix}.png"
-            img_matches_raw_path: Path = log_path / f"img_matches_raw_{postfix}.png"
-            img_matches_ransac_path: Path = (
-                log_path / f"img_matches_ransac_{postfix}.png"
-            )
-            cv2.imwrite(
-                str(img_keypoints_path),
-                output_keypoints[:, :, ::-1].copy(),  # RGB -> BGR
-            )
-            cv2.imwrite(
-                str(img_matches_raw_path),
-                output_matches_raw[:, :, ::-1].copy(),  # RGB -> BGR
-            )
-            cv2.imwrite(
-                str(img_matches_ransac_path),
-                output_matches_ransac[:, :, ::-1].copy(),  # RGB -> BGR
-            )
-            plt.close("all")

+# api.py
+import warnings
+from pathlib import Path
+from typing import Any, Dict, Optional
+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+from ..hloc import extract_features, logger, match_dense, match_features
+from ..hloc.utils.viz import add_text, plot_keypoints
+from ..ui.utils import filter_matches, get_feature_model, get_model
+from ..ui.viz import display_matches, fig2im, plot_images
+warnings.simplefilter("ignore")
+class ImageMatchingAPI(torch.nn.Module):
+    default_conf = {
+        "ransac": {
+            "enable": True,
+            "estimator": "poselib",
+            "geometry": "homography",
+            "method": "RANSAC",
+            "reproj_threshold": 3,
+            "confidence": 0.9999,
+            "max_iter": 10000,
+        },
+    }
+    def __init__(
+        self,
+        conf: dict = {},
+        device: str = "cpu",
+        detect_threshold: float = 0.015,
+        max_keypoints: int = 1024,
+        match_threshold: float = 0.2,
+    ) -> None:
+        """
+        Initializes an instance of the ImageMatchingAPI class.
+        Args:
+            conf (dict): A dictionary containing the configuration parameters.
+            device (str, optional): The device to use for computation. Defaults to "cpu".
+            detect_threshold (float, optional): The threshold for detecting keypoints. Defaults to 0.015.
+            max_keypoints (int, optional): The maximum number of keypoints to extract. Defaults to 1024.
+            match_threshold (float, optional): The threshold for matching keypoints. Defaults to 0.2.
+        Returns:
+            None
+        """
+        super().__init__()
+        self.device = device
+        self.conf = {**self.default_conf, **conf}
+        self._updata_config(detect_threshold, max_keypoints, match_threshold)
+        self._init_models()
+        if device == "cuda":
+            memory_allocated = torch.cuda.memory_allocated(device)
+            memory_reserved = torch.cuda.memory_reserved(device)
+            logger.info(f"GPU memory allocated: {memory_allocated / 1024**2:.3f} MB")
+            logger.info(f"GPU memory reserved: {memory_reserved / 1024**2:.3f} MB")
+        self.pred = None
+    def parse_match_config(self, conf):
+        if conf["dense"]:
+            return {
+                **conf,
+                "matcher": match_dense.confs.get(conf["matcher"]["model"]["name"]),
+                "dense": True,
+            }
+        else:
+            return {
+                **conf,
+                "feature": extract_features.confs.get(conf["feature"]["model"]["name"]),
+                "matcher": match_features.confs.get(conf["matcher"]["model"]["name"]),
+                "dense": False,
+            }
+    def _updata_config(
+        self,
+        detect_threshold: float = 0.015,
+        max_keypoints: int = 1024,
+        match_threshold: float = 0.2,
+    ):
+        self.dense = self.conf["dense"]
+        if self.conf["dense"]:
+            try:
+                self.conf["matcher"]["model"]["match_threshold"] = match_threshold
+            except TypeError as e:
+                logger.error(e)
+        else:
+            self.conf["feature"]["model"]["max_keypoints"] = max_keypoints
+            self.conf["feature"]["model"]["keypoint_threshold"] = detect_threshold
+            self.extract_conf = self.conf["feature"]
+        self.match_conf = self.conf["matcher"]
+    def _init_models(self):
+        # initialize matcher
+        self.matcher = get_model(self.match_conf)
+        # initialize extractor
+        if self.dense:
+            self.extractor = None
+        else:
+            self.extractor = get_feature_model(self.conf["feature"])
+    def _forward(self, img0, img1):
+        if self.dense:
+            pred = match_dense.match_images(
+                self.matcher,
+                img0,
+                img1,
+                self.match_conf["preprocessing"],
+                device=self.device,
+            )
+            last_fixed = "{}".format(  # noqa: F841
+                self.match_conf["model"]["name"]
+            )
+        else:
+            pred0 = extract_features.extract(
+                self.extractor, img0, self.extract_conf["preprocessing"]
+            )
+            pred1 = extract_features.extract(
+                self.extractor, img1, self.extract_conf["preprocessing"]
+            )
+            pred = match_features.match_images(self.matcher, pred0, pred1)
+        return pred
+    def _convert_pred(self, pred):
+        ret = {
+            k: v.cpu().detach()[0].numpy() if isinstance(v, torch.Tensor) else v
+            for k, v in pred.items()
+        }
+        ret = {
+            k: v[0].cpu().detach().numpy() if isinstance(v, list) else v
+            for k, v in ret.items()
+        }
+        return ret
+    @torch.inference_mode()
+    def extract(self, img0: np.ndarray, **kwargs) -> Dict[str, np.ndarray]:
+        """Extract features from a single image.
+        Args:
+            img0 (np.ndarray): image
+        Returns:
+            Dict[str, np.ndarray]: feature dict
+        """
+        # setting prams
+        self.extractor.conf["max_keypoints"] = kwargs.get("max_keypoints", 512)
+        self.extractor.conf["keypoint_threshold"] = kwargs.get(
+            "keypoint_threshold", 0.0
+        )
+        pred = extract_features.extract(
+            self.extractor, img0, self.extract_conf["preprocessing"]
+        )
+        pred = self._convert_pred(pred)
+        # back to origin scale
+        s0 = pred["original_size"] / pred["size"]
+        pred["keypoints_orig"] = (
+            match_features.scale_keypoints(pred["keypoints"] + 0.5, s0) - 0.5
+        )
+        # TODO: rotate back
+        binarize = kwargs.get("binarize", False)
+        if binarize:
+            assert "descriptors" in pred
+            pred["descriptors"] = (pred["descriptors"] > 0).astype(np.uint8)
+            pred["descriptors"] = pred["descriptors"].T  # N x DIM
+        return pred
+    @torch.inference_mode()
+    def forward(
+        self,
+        img0: np.ndarray,
+        img1: np.ndarray,
+    ) -> Dict[str, np.ndarray]:
+        """
+        Forward pass of the image matching API.
+        Args:
+            img0: A 3D NumPy array of shape (H, W, C) representing the first image.
+                  Values are in the range [0, 1] and are in RGB mode.
+            img1: A 3D NumPy array of shape (H, W, C) representing the second image.
+                  Values are in the range [0, 1] and are in RGB mode.
+        Returns:
+            A dictionary containing the following keys:
+            - image0_orig: The original image 0.
+            - image1_orig: The original image 1.
+            - keypoints0_orig: The keypoints detected in image 0.
+            - keypoints1_orig: The keypoints detected in image 1.
+            - mkeypoints0_orig: The raw matches between image 0 and image 1.
+            - mkeypoints1_orig: The raw matches between image 1 and image 0.
+            - mmkeypoints0_orig: The RANSAC inliers in image 0.
+            - mmkeypoints1_orig: The RANSAC inliers in image 1.
+            - mconf: The confidence scores for the raw matches.
+            - mmconf: The confidence scores for the RANSAC inliers.
+        """
+        # Take as input a pair of images (not a batch)
+        assert isinstance(img0, np.ndarray)
+        assert isinstance(img1, np.ndarray)
+        self.pred = self._forward(img0, img1)
+        if self.conf["ransac"]["enable"]:
+            self.pred = self._geometry_check(self.pred)
+        return self.pred
+    def _geometry_check(
+        self,
+        pred: Dict[str, Any],
+    ) -> Dict[str, Any]:
+        """
+        Filter matches using RANSAC. If keypoints are available, filter by keypoints.
+        If lines are available, filter by lines. If both keypoints and lines are
+        available, filter by keypoints.
+        Args:
+            pred (Dict[str, Any]): dict of matches, including original keypoints.
+                                  See :func:`filter_matches` for the expected keys.
+        Returns:
+            Dict[str, Any]: filtered matches
+        """
+        pred = filter_matches(
+            pred,
+            ransac_method=self.conf["ransac"]["method"],
+            ransac_reproj_threshold=self.conf["ransac"]["reproj_threshold"],
+            ransac_confidence=self.conf["ransac"]["confidence"],
+            ransac_max_iter=self.conf["ransac"]["max_iter"],
+        )
+        return pred
+    def visualize(
+        self,
+        log_path: Optional[Path] = None,
+    ) -> None:
+        """
+        Visualize the matches.
+        Args:
+            log_path (Path, optional): The directory to save the images. Defaults to None.
+        Returns:
+            None
+        """
+        if self.conf["dense"]:
+            postfix = str(self.conf["matcher"]["model"]["name"])
+        else:
+            postfix = "{}_{}".format(
+                str(self.conf["feature"]["model"]["name"]),
+                str(self.conf["matcher"]["model"]["name"]),
+            )
+        titles = [
+            "Image 0 - Keypoints",
+            "Image 1 - Keypoints",
+        ]
+        pred: Dict[str, Any] = self.pred
+        image0: np.ndarray = pred["image0_orig"]
+        image1: np.ndarray = pred["image1_orig"]
+        output_keypoints: np.ndarray = plot_images(
+            [image0, image1], titles=titles, dpi=300
+        )
+        if "keypoints0_orig" in pred.keys() and "keypoints1_orig" in pred.keys():
+            plot_keypoints([pred["keypoints0_orig"], pred["keypoints1_orig"]])
+            text: str = (
+                f"# keypoints0: {len(pred['keypoints0_orig'])} \n"
+                + f"# keypoints1: {len(pred['keypoints1_orig'])}"
+            )
+            add_text(0, text, fs=15)
+        output_keypoints = fig2im(output_keypoints)
+        # plot images with raw matches
+        titles = [
+            "Image 0 - Raw matched keypoints",
+            "Image 1 - Raw matched keypoints",
+        ]
+        output_matches_raw, num_matches_raw = display_matches(
+            pred, titles=titles, tag="KPTS_RAW"
+        )
+        # plot images with ransac matches
+        titles = [
+            "Image 0 - Ransac matched keypoints",
+            "Image 1 - Ransac matched keypoints",
+        ]
+        output_matches_ransac, num_matches_ransac = display_matches(
+            pred, titles=titles, tag="KPTS_RANSAC"
+        )
+        if log_path is not None:
+            img_keypoints_path: Path = log_path / f"img_keypoints_{postfix}.png"
+            img_matches_raw_path: Path = log_path / f"img_matches_raw_{postfix}.png"
+            img_matches_ransac_path: Path = (
+                log_path / f"img_matches_ransac_{postfix}.png"
+            )
+            cv2.imwrite(
+                str(img_keypoints_path),
+                output_keypoints[:, :, ::-1].copy(),  # RGB -> BGR
+            )
+            cv2.imwrite(
+                str(img_matches_raw_path),
+                output_matches_raw[:, :, ::-1].copy(),  # RGB -> BGR
+            )
+            cv2.imwrite(
+                str(img_matches_ransac_path),
+                output_matches_ransac[:, :, ::-1].copy(),  # RGB -> BGR
+            )
+            plt.close("all")

imcui/api/server.py CHANGED Viewed

@@ -1,170 +1,186 @@
-# server.py
-import warnings
-from pathlib import Path
-from typing import Union
-import numpy as np
-import ray
-import torch
-import yaml
-from fastapi import FastAPI, File, UploadFile
-from fastapi.responses import JSONResponse
-from PIL import Image
-from ray import serve
-from . import ImagesInput, to_base64_nparray
-from .core import ImageMatchingAPI
-from ..hloc import DEVICE
-from ..ui import get_version
-warnings.simplefilter("ignore")
-app = FastAPI()
-if ray.is_initialized():
-    ray.shutdown()
-ray.init(
-    dashboard_port=8265,
-    ignore_reinit_error=True,
-)
-serve.start(
-    http_options={"host": "0.0.0.0", "port": 8001},
-)
-num_gpus = 1 if torch.cuda.is_available() else 0
-@serve.deployment(
-    num_replicas=4, ray_actor_options={"num_cpus": 2, "num_gpus": num_gpus}
-)
-@serve.ingress(app)
-class ImageMatchingService:
-    def __init__(self, conf: dict, device: str):
-        self.conf = conf
-        self.api = ImageMatchingAPI(conf=conf, device=device)
-    @app.get("/")
-    def root(self):
-        return "Hello, world!"
-    @app.get("/version")
-    async def version(self):
-        return {"version": get_version()}
-    @app.post("/v1/match")
-    async def match(
-        self, image0: UploadFile = File(...), image1: UploadFile = File(...)
-    ):
-        """
-        Handle the image matching request and return the processed result.
-        Args:
-            image0 (UploadFile): The first image file for matching.
-            image1 (UploadFile): The second image file for matching.
-        Returns:
-            JSONResponse: A JSON response containing the filtered match results
-                            or an error message in case of failure.
-        """
-        try:
-            # Load the images from the uploaded files
-            image0_array = self.load_image(image0)
-            image1_array = self.load_image(image1)
-            # Perform image matching using the API
-            output = self.api(image0_array, image1_array)
-            # Keys to skip in the output
-            skip_keys = ["image0_orig", "image1_orig"]
-            # Postprocess the output to filter unwanted data
-            pred = self.postprocess(output, skip_keys)
-            # Return the filtered prediction as a JSON response
-            return JSONResponse(content=pred)
-        except Exception as e:
-            # Return an error message with status code 500 in case of exception
-            return JSONResponse(content={"error": str(e)}, status_code=500)
-    @app.post("/v1/extract")
-    async def extract(self, input_info: ImagesInput):
-        """
-        Extract keypoints and descriptors from images.
-        Args:
-            input_info: An object containing the image data and options.
-        Returns:
-            A list of dictionaries containing the keypoints and descriptors.
-        """
-        try:
-            preds = []
-            for i, input_image in enumerate(input_info.data):
-                # Load the image from the input data
-                image_array = to_base64_nparray(input_image)
-                # Extract keypoints and descriptors
-                output = self.api.extract(
-                    image_array,
-                    max_keypoints=input_info.max_keypoints[i],
-                    binarize=input_info.binarize,
-                )
-                # Do not return the original image and image_orig
-                # skip_keys = ["image", "image_orig"]
-                skip_keys = []
-                # Postprocess the output
-                pred = self.postprocess(output, skip_keys)
-                preds.append(pred)
-            # Return the list of extracted features
-            return JSONResponse(content=preds)
-        except Exception as e:
-            # Return an error message if an exception occurs
-            return JSONResponse(content={"error": str(e)}, status_code=500)
-    def load_image(self, file_path: Union[str, UploadFile]) -> np.ndarray:
-        """
-        Reads an image from a file path or an UploadFile object.
-        Args:
-            file_path: A file path or an UploadFile object.
-        Returns:
-            A numpy array representing the image.
-        """
-        if isinstance(file_path, str):
-            file_path = Path(file_path).resolve(strict=False)
-        else:
-            file_path = file_path.file
-        with Image.open(file_path) as img:
-            image_array = np.array(img)
-        return image_array
-    def postprocess(self, output: dict, skip_keys: list, binarize: bool = True) -> dict:
-        pred = {}
-        for key, value in output.items():
-            if key in skip_keys:
-                continue
-            if isinstance(value, np.ndarray):
-                pred[key] = value.tolist()
-        return pred
-    def run(self, host: str = "0.0.0.0", port: int = 8001):
-        import uvicorn
-        uvicorn.run(app, host=host, port=port)
-def read_config(config_path: Path) -> dict:
-    with open(config_path, "r") as f:
-        conf = yaml.safe_load(f)
-    return conf
-# api server
-conf = read_config(Path(__file__).parent / "config/api.yaml")
-service = ImageMatchingService.bind(conf=conf["api"], device=DEVICE)
-handle = serve.run(service, route_prefix="/")
-# serve run api.server_ray:service
-# build to generate config file
-# serve build api.server_ray:service -o api/config/ray.yaml
-# serve run api/config/ray.yaml

+# server.py
+import warnings
+from pathlib import Path
+from typing import Union
+import numpy as np
+import ray
+import torch
+from fastapi import FastAPI, File, UploadFile
+from fastapi.responses import JSONResponse
+from PIL import Image
+from ray import serve
+import argparse
+from . import ImagesInput, to_base64_nparray
+from .core import ImageMatchingAPI
+from ..hloc import DEVICE
+from ..hloc.utils.io import read_yaml
+from ..ui import get_version
+warnings.simplefilter("ignore")
+app = FastAPI()
+if ray.is_initialized():
+    ray.shutdown()
+# read some configs
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--config",
+    type=Path,
+    required=False,
+    default=Path(__file__).parent / "config/api.yaml",
+)
+args = parser.parse_args()
+config_path = args.config
+config = read_yaml(config_path)
+num_gpus = 1 if torch.cuda.is_available() else 0
+ray_actor_options = config["service"].get("ray_actor_options", {})
+ray_actor_options.update({"num_gpus": num_gpus})
+dashboard_port = config["service"].get("dashboard_port", 8265)
+http_options = config["service"].get(
+    "http_options",
+    {
+        "host": "0.0.0.0",
+        "port": 8001,
+    },
+)
+num_replicas = config["service"].get("num_replicas", 4)
+ray.init(
+    dashboard_port=dashboard_port,
+    ignore_reinit_error=True,
+)
+serve.start(http_options=http_options)
+@serve.deployment(
+    num_replicas=num_replicas,
+    ray_actor_options=ray_actor_options,
+)
+@serve.ingress(app)
+class ImageMatchingService:
+    def __init__(self, conf: dict, device: str, **kwargs):
+        self.conf = conf
+        self.api = ImageMatchingAPI(conf=conf, device=device)
+    @app.get("/")
+    def root(self):
+        return "Hello, world!"
+    @app.get("/version")
+    async def version(self):
+        return {"version": get_version()}
+    @app.post("/v1/match")
+    async def match(
+        self, image0: UploadFile = File(...), image1: UploadFile = File(...)
+    ):
+        """
+        Handle the image matching request and return the processed result.
+        Args:
+            image0 (UploadFile): The first image file for matching.
+            image1 (UploadFile): The second image file for matching.
+        Returns:
+            JSONResponse: A JSON response containing the filtered match results
+                            or an error message in case of failure.
+        """
+        try:
+            # Load the images from the uploaded files
+            image0_array = self.load_image(image0)
+            image1_array = self.load_image(image1)
+            # Perform image matching using the API
+            output = self.api(image0_array, image1_array)
+            # Keys to skip in the output
+            skip_keys = ["image0_orig", "image1_orig"]
+            # Postprocess the output to filter unwanted data
+            pred = self.postprocess(output, skip_keys)
+            # Return the filtered prediction as a JSON response
+            return JSONResponse(content=pred)
+        except Exception as e:
+            # Return an error message with status code 500 in case of exception
+            return JSONResponse(content={"error": str(e)}, status_code=500)
+    @app.post("/v1/extract")
+    async def extract(self, input_info: ImagesInput):
+        """
+        Extract keypoints and descriptors from images.
+        Args:
+            input_info: An object containing the image data and options.
+        Returns:
+            A list of dictionaries containing the keypoints and descriptors.
+        """
+        try:
+            preds = []
+            for i, input_image in enumerate(input_info.data):
+                # Load the image from the input data
+                image_array = to_base64_nparray(input_image)
+                # Extract keypoints and descriptors
+                output = self.api.extract(
+                    image_array,
+                    max_keypoints=input_info.max_keypoints[i],
+                    binarize=input_info.binarize,
+                )
+                # Do not return the original image and image_orig
+                # skip_keys = ["image", "image_orig"]
+                skip_keys = []
+                # Postprocess the output
+                pred = self.postprocess(output, skip_keys)
+                preds.append(pred)
+            # Return the list of extracted features
+            return JSONResponse(content=preds)
+        except Exception as e:
+            # Return an error message if an exception occurs
+            return JSONResponse(content={"error": str(e)}, status_code=500)
+    def load_image(self, file_path: Union[str, UploadFile]) -> np.ndarray:
+        """
+        Reads an image from a file path or an UploadFile object.
+        Args:
+            file_path: A file path or an UploadFile object.
+        Returns:
+            A numpy array representing the image.
+        """
+        if isinstance(file_path, str):
+            file_path = Path(file_path).resolve(strict=False)
+        else:
+            file_path = file_path.file
+        with Image.open(file_path) as img:
+            image_array = np.array(img)
+        return image_array
+    def postprocess(self, output: dict, skip_keys: list, **kwargs) -> dict:
+        pred = {}
+        for key, value in output.items():
+            if key in skip_keys:
+                continue
+            if isinstance(value, np.ndarray):
+                pred[key] = value.tolist()
+        return pred
+    def run(self, host: str = "0.0.0.0", port: int = 8001):
+        import uvicorn
+        uvicorn.run(app, host=host, port=port)
+if __name__ == "__main__":
+    # api server
+    service = ImageMatchingService.bind(conf=config["api"], device=DEVICE)
+    handle = serve.run(service, route_prefix="/", blocking=False)
+# serve run api.server_ray:service
+# build to generate config file
+# serve build api.server_ray:service -o api/config/ray.yaml
+# serve run api/config/ray.yaml

imcui/api/test/build_and_run.sh CHANGED Viewed

@@ -1,16 +1,16 @@
-# g++ main.cpp -I/usr/include/opencv4 -lcurl -ljsoncpp -lb64 -lopencv_core -lopencv_imgcodecs -o main
-# sudo apt-get update
-# sudo apt-get install libboost-all-dev -y
-# sudo apt-get install libcurl4-openssl-dev libjsoncpp-dev libb64-dev libopencv-dev -y
-cd build
-cmake ..
-make -j12
-echo " ======== RUN DEMO ========"
-./client
-echo " ======== END DEMO ========"
-cd ..

+# g++ main.cpp -I/usr/include/opencv4 -lcurl -ljsoncpp -lb64 -lopencv_core -lopencv_imgcodecs -o main
+# sudo apt-get update
+# sudo apt-get install libboost-all-dev -y
+# sudo apt-get install libcurl4-openssl-dev libjsoncpp-dev libb64-dev libopencv-dev -y
+cd build
+cmake ..
+make -j12
+echo " ======== RUN DEMO ========"
+./client
+echo " ======== END DEMO ========"
+cd ..

imcui/api/test/client.cpp CHANGED Viewed

@@ -1,81 +1,81 @@
-#include <curl/curl.h>
-#include <opencv2/opencv.hpp>
-#include "helper.h"
-int main() {
-    std::string img_path =
-        "../../../datasets/sacre_coeur/mapping_rot/02928139_3448003521_rot45.jpg";
-    cv::Mat original_img = cv::imread(img_path, cv::IMREAD_GRAYSCALE);
-    if (original_img.empty()) {
-        throw std::runtime_error("Failed to decode image");
-    }
-    // Convert the image to Base64
-    std::string base64_img = image_to_base64(original_img);
-    // Convert the Base64 back to an image
-    cv::Mat decoded_img = base64_to_image(base64_img);
-    cv::imwrite("decoded_image.jpg", decoded_img);
-    cv::imwrite("original_img.jpg", original_img);
-    // The images should be identical
-    if (cv::countNonZero(original_img != decoded_img) != 0) {
-        std::cerr << "The images are not identical" << std::endl;
-        return -1;
-    } else {
-        std::cout << "The images are identical!" << std::endl;
-    }
-    // construct params
-    APIParams params{.data = {base64_img},
-                     .max_keypoints = {100, 100},
-                     .timestamps = {"0", "1"},
-                     .grayscale = {0},
-                     .image_hw = {{480, 640}, {240, 320}},
-                     .feature_type = 0,
-                     .rotates = {0.0f, 0.0f},
-                     .scales = {1.0f, 1.0f},
-                     .reference_points = {{1.23e+2f, 1.2e+1f},
-                                          {5.0e-1f, 3.0e-1f},
-                                          {2.3e+2f, 2.2e+1f},
-                                          {6.0e-1f, 4.0e-1f}},
-                     .binarize = {1}};
-    KeyPointResults kpts_results;
-    // Convert the parameters to JSON
-    Json::Value jsonData = paramsToJson(params);
-    std::string url = "http://127.0.0.1:8001/v1/extract";
-    Json::StreamWriterBuilder writer;
-    std::string output = Json::writeString(writer, jsonData);
-    CURL* curl;
-    CURLcode res;
-    std::string readBuffer;
-    curl_global_init(CURL_GLOBAL_DEFAULT);
-    curl = curl_easy_init();
-    if (curl) {
-        struct curl_slist* hs = NULL;
-        hs = curl_slist_append(hs, "Content-Type: application/json");
-        curl_easy_setopt(curl, CURLOPT_HTTPHEADER, hs);
-        curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
-        curl_easy_setopt(curl, CURLOPT_POSTFIELDS, output.c_str());
-        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
-        curl_easy_setopt(curl, CURLOPT_WRITEDATA, &readBuffer);
-        res = curl_easy_perform(curl);
-        if (res != CURLE_OK)
-            fprintf(
-                stderr, "curl_easy_perform() failed: %s\n", curl_easy_strerror(res));
-        else {
-            // std::cout << "Response from server: " << readBuffer << std::endl;
-            kpts_results = decode_response(readBuffer);
-        }
-        curl_easy_cleanup(curl);
-    }
-    curl_global_cleanup();
-    return 0;
-}

+#include <curl/curl.h>
+#include <opencv2/opencv.hpp>
+#include "helper.h"
+int main() {
+    std::string img_path =
+        "../../../datasets/sacre_coeur/mapping_rot/02928139_3448003521_rot45.jpg";
+    cv::Mat original_img = cv::imread(img_path, cv::IMREAD_GRAYSCALE);
+    if (original_img.empty()) {
+        throw std::runtime_error("Failed to decode image");
+    }
+    // Convert the image to Base64
+    std::string base64_img = image_to_base64(original_img);
+    // Convert the Base64 back to an image
+    cv::Mat decoded_img = base64_to_image(base64_img);
+    cv::imwrite("decoded_image.jpg", decoded_img);
+    cv::imwrite("original_img.jpg", original_img);
+    // The images should be identical
+    if (cv::countNonZero(original_img != decoded_img) != 0) {
+        std::cerr << "The images are not identical" << std::endl;
+        return -1;
+    } else {
+        std::cout << "The images are identical!" << std::endl;
+    }
+    // construct params
+    APIParams params{.data = {base64_img},
+                     .max_keypoints = {100, 100},
+                     .timestamps = {"0", "1"},
+                     .grayscale = {0},
+                     .image_hw = {{480, 640}, {240, 320}},
+                     .feature_type = 0,
+                     .rotates = {0.0f, 0.0f},
+                     .scales = {1.0f, 1.0f},
+                     .reference_points = {{1.23e+2f, 1.2e+1f},
+                                          {5.0e-1f, 3.0e-1f},
+                                          {2.3e+2f, 2.2e+1f},
+                                          {6.0e-1f, 4.0e-1f}},
+                     .binarize = {1}};
+    KeyPointResults kpts_results;
+    // Convert the parameters to JSON
+    Json::Value jsonData = paramsToJson(params);
+    std::string url = "http://127.0.0.1:8001/v1/extract";
+    Json::StreamWriterBuilder writer;
+    std::string output = Json::writeString(writer, jsonData);
+    CURL* curl;
+    CURLcode res;
+    std::string readBuffer;
+    curl_global_init(CURL_GLOBAL_DEFAULT);
+    curl = curl_easy_init();
+    if (curl) {
+        struct curl_slist* hs = NULL;
+        hs = curl_slist_append(hs, "Content-Type: application/json");
+        curl_easy_setopt(curl, CURLOPT_HTTPHEADER, hs);
+        curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
+        curl_easy_setopt(curl, CURLOPT_POSTFIELDS, output.c_str());
+        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
+        curl_easy_setopt(curl, CURLOPT_WRITEDATA, &readBuffer);
+        res = curl_easy_perform(curl);
+        if (res != CURLE_OK)
+            fprintf(
+                stderr, "curl_easy_perform() failed: %s\n", curl_easy_strerror(res));
+        else {
+            // std::cout << "Response from server: " << readBuffer << std::endl;
+            kpts_results = decode_response(readBuffer);
+        }
+        curl_easy_cleanup(curl);
+    }
+    curl_global_cleanup();
+    return 0;
+}

imcui/api/test/helper.h CHANGED Viewed

@@ -1,405 +1,405 @@
-#include <b64/encode.h>
-#include <fstream>
-#include <jsoncpp/json/json.h>
-#include <opencv2/opencv.hpp>
-#include <sstream>
-#include <vector>
-// base64 to image
-#include <boost/archive/iterators/base64_from_binary.hpp>
-#include <boost/archive/iterators/binary_from_base64.hpp>
-#include <boost/archive/iterators/transform_width.hpp>
-/// Parameters used in the API
-struct APIParams {
-    /// A list of images, base64 encoded
-    std::vector<std::string> data;
-    /// The maximum number of keypoints to detect for each image
-    std::vector<int> max_keypoints;
-    /// The timestamps of the images
-    std::vector<std::string> timestamps;
-    /// Whether to convert the images to grayscale
-    bool grayscale;
-    /// The height and width of each image
-    std::vector<std::vector<int>> image_hw;
-    /// The type of feature detector to use
-    int feature_type;
-    /// The rotations of the images
-    std::vector<double> rotates;
-    /// The scales of the images
-    std::vector<double> scales;
-    /// The reference points of the images
-    std::vector<std::vector<float>> reference_points;
-    /// Whether to binarize the descriptors
-    bool binarize;
-};
-/**
- * @brief Contains the results of a keypoint detector.
- *
- * @details Stores the keypoints and descriptors for each image.
- */
-class KeyPointResults {
-      public:
-    KeyPointResults() {
-    }
-    /**
-     * @brief Constructor.
-     *
-     * @param kp The keypoints for each image.
-     */
-    KeyPointResults(const std::vector<std::vector<cv::KeyPoint>>& kp,
-                    const std::vector<cv::Mat>& desc)
-        : keypoints(kp), descriptors(desc) {
-    }
-    /**
-     * @brief Append keypoints to the result.
-     *
-     * @param kpts The keypoints to append.
-     */
-    inline void append_keypoints(std::vector<cv::KeyPoint>& kpts) {
-        keypoints.emplace_back(kpts);
-    }
-    /**
-     * @brief Append descriptors to the result.
-     *
-     * @param desc The descriptors to append.
-     */
-    inline void append_descriptors(cv::Mat& desc) {
-        descriptors.emplace_back(desc);
-    }
-    /**
-     * @brief Get the keypoints.
-     *
-     * @return The keypoints.
-     */
-    inline std::vector<std::vector<cv::KeyPoint>> get_keypoints() {
-        return keypoints;
-    }
-    /**
-     * @brief Get the descriptors.
-     *
-     * @return The descriptors.
-     */
-    inline std::vector<cv::Mat> get_descriptors() {
-        return descriptors;
-    }
-      private:
-    std::vector<std::vector<cv::KeyPoint>> keypoints;
-    std::vector<cv::Mat> descriptors;
-    std::vector<std::vector<float>> scores;
-};
-/**
- * @brief Decodes a base64 encoded string.
- *
- * @param base64 The base64 encoded string to decode.
- * @return The decoded string.
- */
-std::string base64_decode(const std::string& base64) {
-    using namespace boost::archive::iterators;
-    using It = transform_width<binary_from_base64<std::string::const_iterator>, 8, 6>;
-    // Find the position of the last non-whitespace character
-    auto end = base64.find_last_not_of(" \t\n\r");
-    if (end != std::string::npos) {
-        // Move one past the last non-whitespace character
-        end += 1;
-    }
-    // Decode the base64 string and return the result
-    return std::string(It(base64.begin()), It(base64.begin() + end));
-}
-/**
- * @brief Decodes a base64 string into an OpenCV image
- *
- * @param base64 The base64 encoded string
- * @return The decoded OpenCV image
- */
-cv::Mat base64_to_image(const std::string& base64) {
-    // Decode the base64 string
-    std::string decodedStr = base64_decode(base64);
-    // Decode the image
-    std::vector<uchar> data(decodedStr.begin(), decodedStr.end());
-    cv::Mat img = cv::imdecode(data, cv::IMREAD_GRAYSCALE);
-    // Check for errors
-    if (img.empty()) {
-        throw std::runtime_error("Failed to decode image");
-    }
-    return img;
-}
-/**
- * @brief Encodes an OpenCV image into a base64 string
- *
- * This function takes an OpenCV image and encodes it into a base64 string.
- * The image is first encoded as a PNG image, and then the resulting
- * bytes are encoded as a base64 string.
- *
- * @param img The OpenCV image
- * @return The base64 encoded string
- *
- * @throws std::runtime_error if the image is empty or encoding fails
- */
-std::string image_to_base64(cv::Mat& img) {
-    if (img.empty()) {
-        throw std::runtime_error("Failed to read image");
-    }
-    // Encode the image as a PNG
-    std::vector<uchar> buf;
-    if (!cv::imencode(".png", img, buf)) {
-        throw std::runtime_error("Failed to encode image");
-    }
-    // Encode the bytes as a base64 string
-    using namespace boost::archive::iterators;
-    using It =
-        base64_from_binary<transform_width<std::vector<uchar>::const_iterator, 6, 8>>;
-    std::string base64(It(buf.begin()), It(buf.end()));
-    // Pad the string with '=' characters to a multiple of 4 bytes
-    base64.append((3 - buf.size() % 3) % 3, '=');
-    return base64;
-}
-/**
- * @brief Callback function for libcurl to write data to a string
- *
- * This function is used as a callback for libcurl to write data to a string.
- * It takes the contents, size, and nmemb as parameters, and writes the data to
- * the string.
- *
- * @param contents The data to write
- * @param size The size of the data
- * @param nmemb The number of members in the data
- * @param s The string to write the data to
- * @return The number of bytes written
- */
-size_t WriteCallback(void* contents, size_t size, size_t nmemb, std::string* s) {
-    size_t newLength = size * nmemb;
-    try {
-        // Resize the string to fit the new data
-        s->resize(s->size() + newLength);
-    } catch (std::bad_alloc& e) {
-        // If there's an error allocating memory, return 0
-        return 0;
-    }
-    // Copy the data to the string
-    std::copy(static_cast<const char*>(contents),
-              static_cast<const char*>(contents) + newLength,
-              s->begin() + s->size() - newLength);
-    return newLength;
-}
-// Helper functions
-/**
- * @brief Helper function to convert a type to a Json::Value
- *
- * This function takes a value of type T and converts it to a Json::Value.
- * It is used to simplify the process of converting a type to a Json::Value.
- *
- * @param val The value to convert
- * @return The converted Json::Value
- */
-template <typename T> Json::Value toJson(const T& val) {
-    return Json::Value(val);
-}
-/**
- * @brief Converts a vector to a Json::Value
- *
- * This function takes a vector of type T and converts it to a Json::Value.
- * Each element in the vector is appended to the Json::Value array.
- *
- * @param vec The vector to convert to Json::Value
- * @return The Json::Value representing the vector
- */
-template <typename T> Json::Value vectorToJson(const std::vector<T>& vec) {
-    Json::Value json(Json::arrayValue);
-    for (const auto& item : vec) {
-        json.append(item);
-    }
-    return json;
-}
-/**
- * @brief Converts a nested vector to a Json::Value
- *
- * This function takes a nested vector of type T and converts it to a
- * Json::Value. Each sub-vector is converted to a Json::Value array and appended
- * to the main Json::Value array.
- *
- * @param vec The nested vector to convert to Json::Value
- * @return The Json::Value representing the nested vector
- */
-template <typename T>
-Json::Value nestedVectorToJson(const std::vector<std::vector<T>>& vec) {
-    Json::Value json(Json::arrayValue);
-    for (const auto& subVec : vec) {
-        json.append(vectorToJson(subVec));
-    }
-    return json;
-}
-/**
- * @brief Converts the APIParams struct to a Json::Value
- *
- * This function takes an APIParams struct and converts it to a Json::Value.
- * The Json::Value is a JSON object with the following fields:
- * - data: a JSON array of base64 encoded images
- * - max_keypoints: a JSON array of integers, max number of keypoints for each
- * image
- * - timestamps: a JSON array of timestamps, one for each image
- * - grayscale: a JSON boolean, whether to convert images to grayscale
- * - image_hw: a nested JSON array, each sub-array contains the height and width
- * of an image
- * - feature_type: a JSON integer, the type of feature detector to use
- * - rotates: a JSON array of doubles, the rotation of each image
- * - scales: a JSON array of doubles, the scale of each image
- * - reference_points: a nested JSON array, each sub-array contains the
- * reference points of an image
- * - binarize: a JSON boolean, whether to binarize the descriptors
- *
- * @param params The APIParams struct to convert
- * @return The Json::Value representing the APIParams struct
- */
-Json::Value paramsToJson(const APIParams& params) {
-    Json::Value json;
-    json["data"] = vectorToJson(params.data);
-    json["max_keypoints"] = vectorToJson(params.max_keypoints);
-    json["timestamps"] = vectorToJson(params.timestamps);
-    json["grayscale"] = toJson(params.grayscale);
-    json["image_hw"] = nestedVectorToJson(params.image_hw);
-    json["feature_type"] = toJson(params.feature_type);
-    json["rotates"] = vectorToJson(params.rotates);
-    json["scales"] = vectorToJson(params.scales);
-    json["reference_points"] = nestedVectorToJson(params.reference_points);
-    json["binarize"] = toJson(params.binarize);
-    return json;
-}
-template <typename T> cv::Mat jsonToMat(Json::Value json) {
-    int rows = json.size();
-    int cols = json[0].size();
-    // Create a single array to hold all the data.
-    std::vector<T> data;
-    data.reserve(rows * cols);
-    for (int i = 0; i < rows; i++) {
-        for (int j = 0; j < cols; j++) {
-            data.push_back(static_cast<T>(json[i][j].asInt()));
-        }
-    }
-    // Create a cv::Mat object that points to the data.
-    cv::Mat mat(rows, cols, CV_8UC1,
-                data.data());  // Change the type if necessary.
-    // cv::Mat mat(cols, rows,CV_8UC1, data.data());  // Change the type if
-    // necessary.
-    return mat;
-}
-/**
- * @brief Decodes the response of the server and prints the keypoints
- *
- * This function takes the response of the server, a JSON string, and decodes
- * it. It then prints the keypoints and draws them on the original image.
- *
- * @param response The response of the server
- * @return The keypoints and descriptors
- */
-KeyPointResults decode_response(const std::string& response, bool viz = true) {
-    Json::CharReaderBuilder builder;
-    Json::CharReader* reader = builder.newCharReader();
-    Json::Value jsonData;
-    std::string errors;
-    // Parse the JSON response
-    bool parsingSuccessful = reader->parse(
-        response.c_str(), response.c_str() + response.size(), &jsonData, &errors);
-    delete reader;
-    if (!parsingSuccessful) {
-        // Handle error
-        std::cout << "Failed to parse the JSON, errors:" << std::endl;
-        std::cout << errors << std::endl;
-        return KeyPointResults();
-    }
-    KeyPointResults kpts_results;
-    // Iterate over the images
-    for (const auto& jsonItem : jsonData) {
-        auto jkeypoints = jsonItem["keypoints"];
-        auto jkeypoints_orig = jsonItem["keypoints_orig"];
-        auto jdescriptors = jsonItem["descriptors"];
-        auto jscores = jsonItem["scores"];
-        auto jimageSize = jsonItem["image_size"];
-        auto joriginalSize = jsonItem["original_size"];
-        auto jsize = jsonItem["size"];
-        std::vector<cv::KeyPoint> vkeypoints;
-        std::vector<float> vscores;
-        // Iterate over the keypoints
-        int counter = 0;
-        for (const auto& keypoint : jkeypoints_orig) {
-            if (counter < 10) {
-                // Print the first 10 keypoints
-                std::cout << keypoint[0].asFloat() << ", " << keypoint[1].asFloat()
-                          << std::endl;
-            }
-            counter++;
-            // Convert the Json::Value to a cv::KeyPoint
-            vkeypoints.emplace_back(
-                cv::KeyPoint(keypoint[0].asFloat(), keypoint[1].asFloat(), 0.0));
-        }
-        if (viz && jsonItem.isMember("image_orig")) {
-            auto jimg_orig = jsonItem["image_orig"];
-            cv::Mat img = jsonToMat<uchar>(jimg_orig);
-            cv::imwrite("viz_image_orig.jpg", img);
-            // Draw keypoints on the image
-            cv::Mat imgWithKeypoints;
-            cv::drawKeypoints(img, vkeypoints, imgWithKeypoints, cv::Scalar(0, 0, 255));
-            // Write the image with keypoints
-            std::string filename = "viz_image_orig_keypoints.jpg";
-            cv::imwrite(filename, imgWithKeypoints);
-        }
-        // Iterate over the descriptors
-        cv::Mat descriptors = jsonToMat<uchar>(jdescriptors);
-        kpts_results.append_keypoints(vkeypoints);
-        kpts_results.append_descriptors(descriptors);
-    }
-    return kpts_results;
-}

+#include <b64/encode.h>
+#include <fstream>
+#include <jsoncpp/json/json.h>
+#include <opencv2/opencv.hpp>
+#include <sstream>
+#include <vector>
+// base64 to image
+#include <boost/archive/iterators/base64_from_binary.hpp>
+#include <boost/archive/iterators/binary_from_base64.hpp>
+#include <boost/archive/iterators/transform_width.hpp>
+/// Parameters used in the API
+struct APIParams {
+    /// A list of images, base64 encoded
+    std::vector<std::string> data;
+    /// The maximum number of keypoints to detect for each image
+    std::vector<int> max_keypoints;
+    /// The timestamps of the images
+    std::vector<std::string> timestamps;
+    /// Whether to convert the images to grayscale
+    bool grayscale;
+    /// The height and width of each image
+    std::vector<std::vector<int>> image_hw;
+    /// The type of feature detector to use
+    int feature_type;
+    /// The rotations of the images
+    std::vector<double> rotates;
+    /// The scales of the images
+    std::vector<double> scales;
+    /// The reference points of the images
+    std::vector<std::vector<float>> reference_points;
+    /// Whether to binarize the descriptors
+    bool binarize;
+};
+/**
+ * @brief Contains the results of a keypoint detector.
+ *
+ * @details Stores the keypoints and descriptors for each image.
+ */
+class KeyPointResults {
+      public:
+    KeyPointResults() {
+    }
+    /**
+     * @brief Constructor.
+     *
+     * @param kp The keypoints for each image.
+     */
+    KeyPointResults(const std::vector<std::vector<cv::KeyPoint>>& kp,
+                    const std::vector<cv::Mat>& desc)
+        : keypoints(kp), descriptors(desc) {
+    }
+    /**
+     * @brief Append keypoints to the result.
+     *
+     * @param kpts The keypoints to append.
+     */
+    inline void append_keypoints(std::vector<cv::KeyPoint>& kpts) {
+        keypoints.emplace_back(kpts);
+    }
+    /**
+     * @brief Append descriptors to the result.
+     *
+     * @param desc The descriptors to append.
+     */
+    inline void append_descriptors(cv::Mat& desc) {
+        descriptors.emplace_back(desc);
+    }
+    /**
+     * @brief Get the keypoints.
+     *
+     * @return The keypoints.
+     */
+    inline std::vector<std::vector<cv::KeyPoint>> get_keypoints() {
+        return keypoints;
+    }
+    /**
+     * @brief Get the descriptors.
+     *
+     * @return The descriptors.
+     */
+    inline std::vector<cv::Mat> get_descriptors() {
+        return descriptors;
+    }
+      private:
+    std::vector<std::vector<cv::KeyPoint>> keypoints;
+    std::vector<cv::Mat> descriptors;
+    std::vector<std::vector<float>> scores;
+};
+/**
+ * @brief Decodes a base64 encoded string.
+ *
+ * @param base64 The base64 encoded string to decode.
+ * @return The decoded string.
+ */
+std::string base64_decode(const std::string& base64) {
+    using namespace boost::archive::iterators;
+    using It = transform_width<binary_from_base64<std::string::const_iterator>, 8, 6>;
+    // Find the position of the last non-whitespace character
+    auto end = base64.find_last_not_of(" \t\n\r");
+    if (end != std::string::npos) {
+        // Move one past the last non-whitespace character
+        end += 1;
+    }
+    // Decode the base64 string and return the result
+    return std::string(It(base64.begin()), It(base64.begin() + end));
+}
+/**
+ * @brief Decodes a base64 string into an OpenCV image
+ *
+ * @param base64 The base64 encoded string
+ * @return The decoded OpenCV image
+ */
+cv::Mat base64_to_image(const std::string& base64) {
+    // Decode the base64 string
+    std::string decodedStr = base64_decode(base64);
+    // Decode the image
+    std::vector<uchar> data(decodedStr.begin(), decodedStr.end());
+    cv::Mat img = cv::imdecode(data, cv::IMREAD_GRAYSCALE);
+    // Check for errors
+    if (img.empty()) {
+        throw std::runtime_error("Failed to decode image");
+    }
+    return img;
+}
+/**
+ * @brief Encodes an OpenCV image into a base64 string
+ *
+ * This function takes an OpenCV image and encodes it into a base64 string.
+ * The image is first encoded as a PNG image, and then the resulting
+ * bytes are encoded as a base64 string.
+ *
+ * @param img The OpenCV image
+ * @return The base64 encoded string
+ *
+ * @throws std::runtime_error if the image is empty or encoding fails
+ */
+std::string image_to_base64(cv::Mat& img) {
+    if (img.empty()) {
+        throw std::runtime_error("Failed to read image");
+    }
+    // Encode the image as a PNG
+    std::vector<uchar> buf;
+    if (!cv::imencode(".png", img, buf)) {
+        throw std::runtime_error("Failed to encode image");
+    }
+    // Encode the bytes as a base64 string
+    using namespace boost::archive::iterators;
+    using It =
+        base64_from_binary<transform_width<std::vector<uchar>::const_iterator, 6, 8>>;
+    std::string base64(It(buf.begin()), It(buf.end()));
+    // Pad the string with '=' characters to a multiple of 4 bytes
+    base64.append((3 - buf.size() % 3) % 3, '=');
+    return base64;
+}
+/**
+ * @brief Callback function for libcurl to write data to a string
+ *
+ * This function is used as a callback for libcurl to write data to a string.
+ * It takes the contents, size, and nmemb as parameters, and writes the data to
+ * the string.
+ *
+ * @param contents The data to write
+ * @param size The size of the data
+ * @param nmemb The number of members in the data
+ * @param s The string to write the data to
+ * @return The number of bytes written
+ */
+size_t WriteCallback(void* contents, size_t size, size_t nmemb, std::string* s) {
+    size_t newLength = size * nmemb;
+    try {
+        // Resize the string to fit the new data
+        s->resize(s->size() + newLength);
+    } catch (std::bad_alloc& e) {
+        // If there's an error allocating memory, return 0
+        return 0;
+    }
+    // Copy the data to the string
+    std::copy(static_cast<const char*>(contents),
+              static_cast<const char*>(contents) + newLength,
+              s->begin() + s->size() - newLength);
+    return newLength;
+}
+// Helper functions
+/**
+ * @brief Helper function to convert a type to a Json::Value
+ *
+ * This function takes a value of type T and converts it to a Json::Value.
+ * It is used to simplify the process of converting a type to a Json::Value.
+ *
+ * @param val The value to convert
+ * @return The converted Json::Value
+ */
+template <typename T> Json::Value toJson(const T& val) {
+    return Json::Value(val);
+}
+/**
+ * @brief Converts a vector to a Json::Value
+ *
+ * This function takes a vector of type T and converts it to a Json::Value.
+ * Each element in the vector is appended to the Json::Value array.
+ *
+ * @param vec The vector to convert to Json::Value
+ * @return The Json::Value representing the vector
+ */
+template <typename T> Json::Value vectorToJson(const std::vector<T>& vec) {
+    Json::Value json(Json::arrayValue);
+    for (const auto& item : vec) {
+        json.append(item);
+    }
+    return json;
+}
+/**
+ * @brief Converts a nested vector to a Json::Value
+ *
+ * This function takes a nested vector of type T and converts it to a
+ * Json::Value. Each sub-vector is converted to a Json::Value array and appended
+ * to the main Json::Value array.
+ *
+ * @param vec The nested vector to convert to Json::Value
+ * @return The Json::Value representing the nested vector
+ */
+template <typename T>
+Json::Value nestedVectorToJson(const std::vector<std::vector<T>>& vec) {
+    Json::Value json(Json::arrayValue);
+    for (const auto& subVec : vec) {
+        json.append(vectorToJson(subVec));
+    }
+    return json;
+}
+/**
+ * @brief Converts the APIParams struct to a Json::Value
+ *
+ * This function takes an APIParams struct and converts it to a Json::Value.
+ * The Json::Value is a JSON object with the following fields:
+ * - data: a JSON array of base64 encoded images
+ * - max_keypoints: a JSON array of integers, max number of keypoints for each
+ * image
+ * - timestamps: a JSON array of timestamps, one for each image
+ * - grayscale: a JSON boolean, whether to convert images to grayscale
+ * - image_hw: a nested JSON array, each sub-array contains the height and width
+ * of an image
+ * - feature_type: a JSON integer, the type of feature detector to use
+ * - rotates: a JSON array of doubles, the rotation of each image
+ * - scales: a JSON array of doubles, the scale of each image
+ * - reference_points: a nested JSON array, each sub-array contains the
+ * reference points of an image
+ * - binarize: a JSON boolean, whether to binarize the descriptors
+ *
+ * @param params The APIParams struct to convert
+ * @return The Json::Value representing the APIParams struct
+ */
+Json::Value paramsToJson(const APIParams& params) {
+    Json::Value json;
+    json["data"] = vectorToJson(params.data);
+    json["max_keypoints"] = vectorToJson(params.max_keypoints);
+    json["timestamps"] = vectorToJson(params.timestamps);
+    json["grayscale"] = toJson(params.grayscale);
+    json["image_hw"] = nestedVectorToJson(params.image_hw);
+    json["feature_type"] = toJson(params.feature_type);
+    json["rotates"] = vectorToJson(params.rotates);
+    json["scales"] = vectorToJson(params.scales);
+    json["reference_points"] = nestedVectorToJson(params.reference_points);
+    json["binarize"] = toJson(params.binarize);
+    return json;
+}
+template <typename T> cv::Mat jsonToMat(Json::Value json) {
+    int rows = json.size();
+    int cols = json[0].size();
+    // Create a single array to hold all the data.
+    std::vector<T> data;
+    data.reserve(rows * cols);
+    for (int i = 0; i < rows; i++) {
+        for (int j = 0; j < cols; j++) {
+            data.push_back(static_cast<T>(json[i][j].asInt()));
+        }
+    }
+    // Create a cv::Mat object that points to the data.
+    cv::Mat mat(rows, cols, CV_8UC1,
+                data.data());  // Change the type if necessary.
+    // cv::Mat mat(cols, rows,CV_8UC1, data.data());  // Change the type if
+    // necessary.
+    return mat;
+}
+/**
+ * @brief Decodes the response of the server and prints the keypoints
+ *
+ * This function takes the response of the server, a JSON string, and decodes
+ * it. It then prints the keypoints and draws them on the original image.
+ *
+ * @param response The response of the server
+ * @return The keypoints and descriptors
+ */
+KeyPointResults decode_response(const std::string& response, bool viz = true) {
+    Json::CharReaderBuilder builder;
+    Json::CharReader* reader = builder.newCharReader();
+    Json::Value jsonData;
+    std::string errors;
+    // Parse the JSON response
+    bool parsingSuccessful = reader->parse(
+        response.c_str(), response.c_str() + response.size(), &jsonData, &errors);
+    delete reader;
+    if (!parsingSuccessful) {
+        // Handle error
+        std::cout << "Failed to parse the JSON, errors:" << std::endl;
+        std::cout << errors << std::endl;
+        return KeyPointResults();
+    }
+    KeyPointResults kpts_results;
+    // Iterate over the images
+    for (const auto& jsonItem : jsonData) {
+        auto jkeypoints = jsonItem["keypoints"];
+        auto jkeypoints_orig = jsonItem["keypoints_orig"];
+        auto jdescriptors = jsonItem["descriptors"];
+        auto jscores = jsonItem["scores"];
+        auto jimageSize = jsonItem["image_size"];
+        auto joriginalSize = jsonItem["original_size"];
+        auto jsize = jsonItem["size"];
+        std::vector<cv::KeyPoint> vkeypoints;
+        std::vector<float> vscores;
+        // Iterate over the keypoints
+        int counter = 0;
+        for (const auto& keypoint : jkeypoints_orig) {
+            if (counter < 10) {
+                // Print the first 10 keypoints
+                std::cout << keypoint[0].asFloat() << ", " << keypoint[1].asFloat()
+                          << std::endl;
+            }
+            counter++;
+            // Convert the Json::Value to a cv::KeyPoint
+            vkeypoints.emplace_back(
+                cv::KeyPoint(keypoint[0].asFloat(), keypoint[1].asFloat(), 0.0));
+        }
+        if (viz && jsonItem.isMember("image_orig")) {
+            auto jimg_orig = jsonItem["image_orig"];
+            cv::Mat img = jsonToMat<uchar>(jimg_orig);
+            cv::imwrite("viz_image_orig.jpg", img);
+            // Draw keypoints on the image
+            cv::Mat imgWithKeypoints;
+            cv::drawKeypoints(img, vkeypoints, imgWithKeypoints, cv::Scalar(0, 0, 255));
+            // Write the image with keypoints
+            std::string filename = "viz_image_orig_keypoints.jpg";
+            cv::imwrite(filename, imgWithKeypoints);
+        }
+        // Iterate over the descriptors
+        cv::Mat descriptors = jsonToMat<uchar>(jdescriptors);
+        kpts_results.append_keypoints(vkeypoints);
+        kpts_results.append_descriptors(descriptors);
+    }
+    return kpts_results;
+}

imcui/ui/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
-__version__ = "1.0.1"
-def get_version():
-    return __version__

+__version__ = "1.3.0"
+def get_version():
+    return __version__

imcui/ui/app_class.py CHANGED Viewed

@@ -1,820 +1,816 @@
-from pathlib import Path
-from typing import Any, Dict, Optional, Tuple
-import gradio as gr
-import numpy as np
-from easydict import EasyDict as edict
-from omegaconf import OmegaConf
-from .sfm import SfmEngine
-from .utils import (
-    GRADIO_VERSION,
-    gen_examples,
-    generate_warp_images,
-    get_matcher_zoo,
-    load_config,
-    ransac_zoo,
-    run_matching,
-    run_ransac,
-    send_to_match,
-)
-DESCRIPTION = """
-# Image Matching WebUI
-This Space demonstrates [Image Matching WebUI](https://github.com/Vincentqyw/image-matching-webui) by vincent qin. Feel free to play with it, or duplicate to run image matching without a queue!
-<br/>
-🔎 For more details about supported local features and matchers, please refer to https://github.com/Vincentqyw/image-matching-webui
-🚀 All algorithms run on CPU for inference, causing slow speeds and high latency. For faster inference, please download the [source code](https://github.com/Vincentqyw/image-matching-webui) for local deployment.
-🐛 Your feedback is valuable to me. Please do not hesitate to report any bugs [here](https://github.com/Vincentqyw/image-matching-webui/issues).
-"""
-CSS = """
-#warning {background-color: #FFCCCB}
-.logs_class textarea {font-size: 12px !important}
-"""
-class ImageMatchingApp:
-    def __init__(self, server_name="0.0.0.0", server_port=7860, **kwargs):
-        self.server_name = server_name
-        self.server_port = server_port
-        self.config_path = kwargs.get("config", Path(__file__).parent / "config.yaml")
-        self.cfg = load_config(self.config_path)
-        self.matcher_zoo = get_matcher_zoo(self.cfg["matcher_zoo"])
-        self.app = None
-        self.example_data_root = kwargs.get(
-            "example_data_root", Path(__file__).parents[1] / "datasets"
-        )
-        # final step
-        self.init_interface()
-    def init_matcher_dropdown(self):
-        algos = []
-        for k, v in self.cfg["matcher_zoo"].items():
-            if v.get("enable", True):
-                algos.append(k)
-        return algos
-    def init_interface(self):
-        with gr.Blocks(css=CSS) as self.app:
-            with gr.Tab("Image Matching"):
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        gr.Image(
-                            str(Path(__file__).parent.parent / "assets/logo.webp"),
-                            elem_id="logo-img",
-                            show_label=False,
-                            show_share_button=False,
-                            show_download_button=False,
-                        )
-                    with gr.Column(scale=3):
-                        gr.Markdown(DESCRIPTION)
-                with gr.Row(equal_height=False):
-                    with gr.Column():
-                        with gr.Row():
-                            matcher_list = gr.Dropdown(
-                                choices=self.init_matcher_dropdown(),
-                                value="disk+lightglue",
-                                label="Matching Model",
-                                interactive=True,
-                            )
-                            match_image_src = gr.Radio(
-                                (
-                                    ["upload", "webcam", "clipboard"]
-                                    if GRADIO_VERSION > "3"
-                                    else ["upload", "webcam", "canvas"]
-                                ),
-                                label="Image Source",
-                                value="upload",
-                            )
-                        with gr.Row():
-                            input_image0 = gr.Image(
-                                label="Image 0",
-                                type="numpy",
-                                image_mode="RGB",
-                                height=300 if GRADIO_VERSION > "3" else None,
-                                interactive=True,
-                            )
-                            input_image1 = gr.Image(
-                                label="Image 1",
-                                type="numpy",
-                                image_mode="RGB",
-                                height=300 if GRADIO_VERSION > "3" else None,
-                                interactive=True,
-                            )
-                        with gr.Row():
-                            button_reset = gr.Button(value="Reset")
-                            button_run = gr.Button(value="Run Match", variant="primary")
-                        with gr.Row():
-                            button_stop = gr.Button(value="Force Stop", variant="stop")
-                        with gr.Accordion("Advanced Setting", open=False):
-                            with gr.Accordion("Image Setting", open=True):
-                                with gr.Row():
-                                    image_force_resize_cb = gr.Checkbox(
-                                        label="Force Resize",
-                                        value=False,
-                                        interactive=True,
-                                    )
-                                    image_setting_height = gr.Slider(
-                                        minimum=48,
-                                        maximum=2048,
-                                        step=16,
-                                        label="Image Height",
-                                        value=480,
-                                        visible=False,
-                                    )
-                                    image_setting_width = gr.Slider(
-                                        minimum=64,
-                                        maximum=2048,
-                                        step=16,
-                                        label="Image Width",
-                                        value=640,
-                                        visible=False,
-                                    )
-                            with gr.Accordion("Matching Setting", open=True):
-                                with gr.Row():
-                                    match_setting_threshold = gr.Slider(
-                                        minimum=0.0,
-                                        maximum=1,
-                                        step=0.001,
-                                        label="Match threshold",
-                                        value=0.1,
-                                    )
-                                    match_setting_max_keypoints = gr.Slider(
-                                        minimum=10,
-                                        maximum=10000,
-                                        step=10,
-                                        label="Max features",
-                                        value=1000,
-                                    )
-                                # TODO: add line settings
-                                with gr.Row():
-                                    detect_keypoints_threshold = gr.Slider(
-                                        minimum=0,
-                                        maximum=1,
-                                        step=0.001,
-                                        label="Keypoint threshold",
-                                        value=0.015,
-                                    )
-                                    detect_line_threshold = (  # noqa: F841
-                                        gr.Slider(
-                                            minimum=0.1,
-                                            maximum=1,
-                                            step=0.01,
-                                            label="Line threshold",
-                                            value=0.2,
-                                        )
-                                    )
-                                # matcher_lists = gr.Radio(
-                                #     ["NN-mutual", "Dual-Softmax"],
-                                #     label="Matcher mode",
-                                #     value="NN-mutual",
-                                # )
-                            with gr.Accordion("RANSAC Setting", open=True):
-                                with gr.Row(equal_height=False):
-                                    ransac_method = gr.Dropdown(
-                                        choices=ransac_zoo.keys(),
-                                        value=self.cfg["defaults"]["ransac_method"],
-                                        label="RANSAC Method",
-                                        interactive=True,
-                                    )
-                                ransac_reproj_threshold = gr.Slider(
-                                    minimum=0.0,
-                                    maximum=12,
-                                    step=0.01,
-                                    label="Ransac Reproj threshold",
-                                    value=8.0,
-                                )
-                                ransac_confidence = gr.Slider(
-                                    minimum=0.0,
-                                    maximum=1,
-                                    step=0.00001,
-                                    label="Ransac Confidence",
-                                    value=self.cfg["defaults"]["ransac_confidence"],
-                                )
-                                ransac_max_iter = gr.Slider(
-                                    minimum=0.0,
-                                    maximum=100000,
-                                    step=100,
-                                    label="Ransac Iterations",
-                                    value=self.cfg["defaults"]["ransac_max_iter"],
-                                )
-                                button_ransac = gr.Button(
-                                    value="Rerun RANSAC", variant="primary"
-                                )
-                            with gr.Accordion("Geometry Setting", open=False):
-                                with gr.Row(equal_height=False):
-                                    choice_geometry_type = gr.Radio(
-                                        ["Fundamental", "Homography"],
-                                        label="Reconstruct Geometry",
-                                        value=self.cfg["defaults"]["setting_geometry"],
-                                    )
-                        # image resize
-                        image_force_resize_cb.select(
-                            fn=self._on_select_force_resize,
-                            inputs=image_force_resize_cb,
-                            outputs=[image_setting_width, image_setting_height],
-                        )
-                        # collect inputs
-                        state_cache = gr.State({})
-                        inputs = [
-                            input_image0,
-                            input_image1,
-                            match_setting_threshold,
-                            match_setting_max_keypoints,
-                            detect_keypoints_threshold,
-                            matcher_list,
-                            ransac_method,
-                            ransac_reproj_threshold,
-                            ransac_confidence,
-                            ransac_max_iter,
-                            choice_geometry_type,
-                            gr.State(self.matcher_zoo),
-                            image_force_resize_cb,
-                            image_setting_width,
-                            image_setting_height,
-                        ]
-                        # Add some examples
-                        with gr.Row():
-                            # Example inputs
-                            with gr.Accordion("Open for More: Examples", open=True):
-                                gr.Examples(
-                                    examples=gen_examples(self.example_data_root),
-                                    inputs=inputs,
-                                    outputs=[],
-                                    fn=run_matching,
-                                    cache_examples=False,
-                                    label=(
-                                        "Examples (click one of the images below to Run"
-                                        " Match). Thx: WxBS"
-                                    ),
-                                )
-                        with gr.Accordion("Supported Algorithms", open=False):
-                            # add a table of supported algorithms
-                            self.display_supported_algorithms()
-                    with gr.Column():
-                        with gr.Accordion("Open for More: Keypoints", open=True):
-                            output_keypoints = gr.Image(label="Keypoints", type="numpy")
-                        with gr.Accordion(
-                            (
-                                "Open for More: Raw Matches"
-                                " (Green for good matches, Red for bad)"
-                            ),
-                            open=False,
-                        ):
-                            output_matches_raw = gr.Image(
-                                label="Raw Matches",
-                                type="numpy",
-                            )
-                        with gr.Accordion(
-                            (
-                                "Open for More: Ransac Matches"
-                                " (Green for good matches, Red for bad)"
-                            ),
-                            open=True,
-                        ):
-                            output_matches_ransac = gr.Image(
-                                label="Ransac Matches", type="numpy"
-                            )
-                        with gr.Accordion(
-                            "Open for More: Matches Statistics", open=False
-                        ):
-                            output_pred = gr.File(label="Outputs", elem_id="download")
-                            matches_result_info = gr.JSON(label="Matches Statistics")
-                            matcher_info = gr.JSON(label="Match info")
-                        with gr.Accordion("Open for More: Warped Image", open=True):
-                            output_wrapped = gr.Image(
-                                label="Wrapped Pair", type="numpy"
-                            )
-                            # send to input
-                            button_rerun = gr.Button(
-                                value="Send to Input Match Pair",
-                                variant="primary",
-                            )
-                            with gr.Accordion(
-                                "Open for More: Geometry info", open=False
-                            ):
-                                geometry_result = gr.JSON(
-                                    label="Reconstructed Geometry"
-                                )
-                    # callbacks
-                    match_image_src.change(
-                        fn=self.ui_change_imagebox,
-                        inputs=match_image_src,
-                        outputs=input_image0,
-                    )
-                    match_image_src.change(
-                        fn=self.ui_change_imagebox,
-                        inputs=match_image_src,
-                        outputs=input_image1,
-                    )
-                    # collect outputs
-                    outputs = [
-                        output_keypoints,
-                        output_matches_raw,
-                        output_matches_ransac,
-                        matches_result_info,
-                        matcher_info,
-                        geometry_result,
-                        output_wrapped,
-                        state_cache,
-                        output_pred,
-                    ]
-                    # button callbacks
-                    click_event = button_run.click(
-                        fn=run_matching, inputs=inputs, outputs=outputs
-                    )
-                    # stop button
-                    button_stop.click(
-                        fn=None, inputs=None, outputs=None, cancels=[click_event]
-                    )
-                    # Reset images
-                    reset_outputs = [
-                        input_image0,
-                        input_image1,
-                        match_setting_threshold,
-                        match_setting_max_keypoints,
-                        detect_keypoints_threshold,
-                        matcher_list,
-                        input_image0,
-                        input_image1,
-                        match_image_src,
-                        output_keypoints,
-                        output_matches_raw,
-                        output_matches_ransac,
-                        matches_result_info,
-                        matcher_info,
-                        output_wrapped,
-                        geometry_result,
-                        ransac_method,
-                        ransac_reproj_threshold,
-                        ransac_confidence,
-                        ransac_max_iter,
-                        choice_geometry_type,
-                        output_pred,
-                        image_force_resize_cb,
-                    ]
-                    button_reset.click(
-                        fn=self.ui_reset_state,
-                        inputs=None,
-                        outputs=reset_outputs,
-                    )
-                    # run ransac button action
-                    button_ransac.click(
-                        fn=run_ransac,
-                        inputs=[
-                            state_cache,
-                            choice_geometry_type,
-                            ransac_method,
-                            ransac_reproj_threshold,
-                            ransac_confidence,
-                            ransac_max_iter,
-                        ],
-                        outputs=[
-                            output_matches_ransac,
-                            matches_result_info,
-                            output_wrapped,
-                            output_pred,
-                        ],
-                    )
-                    # send warped image to match
-                    button_rerun.click(
-                        fn=send_to_match,
-                        inputs=[state_cache],
-                        outputs=[input_image0, input_image1],
-                    )
-                    # estimate geo
-                    choice_geometry_type.change(
-                        fn=generate_warp_images,
-                        inputs=[
-                            input_image0,
-                            input_image1,
-                            geometry_result,
-                            choice_geometry_type,
-                        ],
-                        outputs=[output_wrapped, geometry_result],
-                    )
-            with gr.Tab("Structure from Motion(under-dev)"):
-                sfm_ui = AppSfmUI(  # noqa: F841
-                    {
-                        **self.cfg,
-                        "matcher_zoo": self.matcher_zoo,
-                        "outputs": "experiments/sfm",
-                    }
-                )
-                sfm_ui.call_empty()
-    def run(self):
-        self.app.queue().launch(
-            server_name=self.server_name,
-            server_port=self.server_port,
-            share=False,
-            allowed_paths=[
-                str(Path(__file__).parents[0]),
-                str(Path(__file__).parents[1]),
-            ],
-        )
-    def ui_change_imagebox(self, choice):
-        """
-        Updates the image box with the given choice.
-        Args:
-            choice (list): The list of image sources to be displayed in the image box.
-        Returns:
-            dict: A dictionary containing the updated value, sources, and type for the image box.
-        """
-        ret_dict = {
-            "value": None,  # The updated value of the image box
-            "__type__": "update",  # The type of update for the image box
-        }
-        if GRADIO_VERSION > "3":
-            return {
-                **ret_dict,
-                "sources": choice,  # The list of image sources to be displayed
-            }
-        else:
-            return {
-                **ret_dict,
-                "source": choice,  # The list of image sources to be displayed
-            }
-    def _on_select_force_resize(self, visible: bool = False):
-        return gr.update(visible=visible), gr.update(visible=visible)
-    def ui_reset_state(
-        self,
-        *args: Any,
-    ) -> Tuple[
-        Optional[np.ndarray],
-        Optional[np.ndarray],
-        float,
-        int,
-        float,
-        str,
-        Dict[str, Any],
-        Dict[str, Any],
-        str,
-        Optional[np.ndarray],
-        Optional[np.ndarray],
-        Optional[np.ndarray],
-        Dict[str, Any],
-        Dict[str, Any],
-        Optional[np.ndarray],
-        Dict[str, Any],
-        str,
-        int,
-        float,
-        int,
-        bool,
-    ]:
-        """
-        Reset the state of the UI.
-        Returns:
-            tuple: A tuple containing the initial values for the UI state.
-        """
-        key: str = list(self.matcher_zoo.keys())[
-            0
-        ]  # Get the first key from matcher_zoo
-        # flush_logs()
-        return (
-            None,  # image0: Optional[np.ndarray]
-            None,  # image1: Optional[np.ndarray]
-            self.cfg["defaults"]["match_threshold"],  # matching_threshold: float
-            self.cfg["defaults"]["max_keypoints"],  # max_keypoints: int
-            self.cfg["defaults"]["keypoint_threshold"],  # keypoint_threshold: float
-            key,  # matcher: str
-            self.ui_change_imagebox("upload"),  # input image0: Dict[str, Any]
-            self.ui_change_imagebox("upload"),  # input image1: Dict[str, Any]
-            "upload",  # match_image_src: str
-            None,  # keypoints: Optional[np.ndarray]
-            None,  # raw matches: Optional[np.ndarray]
-            None,  # ransac matches: Optional[np.ndarray]
-            {},  # matches result info: Dict[str, Any]
-            {},  # matcher config: Dict[str, Any]
-            None,  # warped image: Optional[np.ndarray]
-            {},  # geometry result: Dict[str, Any]
-            self.cfg["defaults"]["ransac_method"],  # ransac_method: str
-            self.cfg["defaults"][
-                "ransac_reproj_threshold"
-            ],  # ransac_reproj_threshold: float
-            self.cfg["defaults"]["ransac_confidence"],  # ransac_confidence: float
-            self.cfg["defaults"]["ransac_max_iter"],  # ransac_max_iter: int
-            self.cfg["defaults"]["setting_geometry"],  # geometry: str
-            None,  # predictions
-            False,
-        )
-    def display_supported_algorithms(self, style="tab"):
-        def get_link(link, tag="Link"):
-            return "[{}]({})".format(tag, link) if link is not None else "None"
-        data = []
-        cfg = self.cfg["matcher_zoo"]
-        if style == "md":
-            markdown_table = "| Algo. | Conference | Code | Project | Paper |\n"
-            markdown_table += "| ----- | ---------- | ---- | ------- | ----- |\n"
-            for _, v in cfg.items():
-                if not v["info"].get("display", True):
-                    continue
-                github_link = get_link(v["info"].get("github", ""))
-                project_link = get_link(v["info"].get("project", ""))
-                paper_link = get_link(
-                    v["info"]["paper"],
-                    (
-                        Path(v["info"]["paper"]).name[-10:]
-                        if v["info"]["paper"] is not None
-                        else "Link"
-                    ),
-                )
-                markdown_table += "{}|{}|{}|{}|{}\n".format(
-                    v["info"].get("name", ""),
-                    v["info"].get("source", ""),
-                    github_link,
-                    project_link,
-                    paper_link,
-                )
-            return gr.Markdown(markdown_table)
-        elif style == "tab":
-            for k, v in cfg.items():
-                if not v["info"].get("display", True):
-                    continue
-                data.append(
-                    [
-                        v["info"].get("name", ""),
-                        v["info"].get("source", ""),
-                        v["info"].get("github", ""),
-                        v["info"].get("paper", ""),
-                        v["info"].get("project", ""),
-                    ]
-                )
-            tab = gr.Dataframe(
-                headers=["Algo.", "Conference", "Code", "Paper", "Project"],
-                datatype=["str", "str", "str", "str", "str"],
-                col_count=(5, "fixed"),
-                value=data,
-                # wrap=True,
-                # min_width = 1000,
-                # height=1000,
-            )
-            return tab
-class AppBaseUI:
-    def __init__(self, cfg: Dict[str, Any] = {}):
-        self.cfg = OmegaConf.create(cfg)
-        self.inputs = edict({})
-        self.outputs = edict({})
-        self.ui = edict({})
-    def _init_ui(self):
-        NotImplemented
-    def call(self, **kwargs):
-        NotImplemented
-    def info(self):
-        gr.Info("SFM is under construction.")
-class AppSfmUI(AppBaseUI):
-    def __init__(self, cfg: Dict[str, Any] = None):
-        super().__init__(cfg)
-        assert "matcher_zoo" in self.cfg
-        self.matcher_zoo = self.cfg["matcher_zoo"]
-        self.sfm_engine = SfmEngine(cfg)
-        self._init_ui()
-    def init_retrieval_dropdown(self):
-        algos = []
-        for k, v in self.cfg["retrieval_zoo"].items():
-            if v.get("enable", True):
-                algos.append(k)
-        return algos
-    def _update_options(self, option):
-        if option == "sparse":
-            return gr.Textbox("sparse", visible=True)
-        elif option == "dense":
-            return gr.Textbox("dense", visible=True)
-        else:
-            return gr.Textbox("not set", visible=True)
-    def _on_select_custom_params(self, value: bool = False):
-        return gr.update(visible=value)
-    def _init_ui(self):
-        with gr.Row():
-            # data settting and camera settings
-            with gr.Column():
-                self.inputs.input_images = gr.File(
-                    label="SfM",
-                    interactive=True,
-                    file_count="multiple",
-                    min_width=300,
-                )
-                # camera setting
-                with gr.Accordion("Camera Settings", open=True):
-                    with gr.Column():
-                        with gr.Row():
-                            with gr.Column():
-                                self.inputs.camera_model = gr.Dropdown(
-                                    choices=[
-                                        "PINHOLE",
-                                        "SIMPLE_RADIAL",
-                                        "OPENCV",
-                                    ],
-                                    value="PINHOLE",
-                                    label="Camera Model",
-                                    interactive=True,
-                                )
-                            with gr.Column():
-                                gr.Checkbox(
-                                    label="Shared Params",
-                                    value=True,
-                                    interactive=True,
-                                )
-                                camera_custom_params_cb = gr.Checkbox(
-                                    label="Custom Params",
-                                    value=False,
-                                    interactive=True,
-                                )
-                        with gr.Row():
-                            self.inputs.camera_params = gr.Textbox(
-                                label="Camera Params",
-                                value="0,0,0,0",
-                                interactive=False,
-                                visible=False,
-                            )
-                        camera_custom_params_cb.select(
-                            fn=self._on_select_custom_params,
-                            inputs=camera_custom_params_cb,
-                            outputs=self.inputs.camera_params,
-                        )
-                with gr.Accordion("Matching Settings", open=True):
-                    # feature extraction and matching setting
-                    with gr.Row():
-                        # matcher setting
-                        self.inputs.matcher_key = gr.Dropdown(
-                            choices=self.matcher_zoo.keys(),
-                            value="disk+lightglue",
-                            label="Matching Model",
-                            interactive=True,
-                        )
-                    with gr.Row():
-                        with gr.Accordion("Advanced Settings", open=False):
-                            with gr.Column():
-                                with gr.Row():
-                                    # matching setting
-                                    self.inputs.max_keypoints = gr.Slider(
-                                        label="Max Keypoints",
-                                        minimum=100,
-                                        maximum=10000,
-                                        value=1000,
-                                        interactive=True,
-                                    )
-                                    self.inputs.keypoint_threshold = gr.Slider(
-                                        label="Keypoint Threshold",
-                                        minimum=0,
-                                        maximum=1,
-                                        value=0.01,
-                                    )
-                                with gr.Row():
-                                    self.inputs.match_threshold = gr.Slider(
-                                        label="Match Threshold",
-                                        minimum=0.01,
-                                        maximum=12.0,
-                                        value=0.2,
-                                    )
-                                    self.inputs.ransac_threshold = gr.Slider(
-                                        label="Ransac Threshold",
-                                        minimum=0.01,
-                                        maximum=12.0,
-                                        value=4.0,
-                                        step=0.01,
-                                        interactive=True,
-                                    )
-                                with gr.Row():
-                                    self.inputs.ransac_confidence = gr.Slider(
-                                        label="Ransac Confidence",
-                                        minimum=0.01,
-                                        maximum=1.0,
-                                        value=0.9999,
-                                        step=0.0001,
-                                        interactive=True,
-                                    )
-                                    self.inputs.ransac_max_iter = gr.Slider(
-                                        label="Ransac Max Iter",
-                                        minimum=1,
-                                        maximum=100,
-                                        value=100,
-                                        step=1,
-                                        interactive=True,
-                                    )
-                with gr.Accordion("Scene Graph Settings", open=True):
-                    # mapping setting
-                    self.inputs.scene_graph = gr.Dropdown(
-                        choices=["all", "swin", "oneref"],
-                        value="all",
-                        label="Scene Graph",
-                        interactive=True,
-                    )
-                    # global feature setting
-                    self.inputs.global_feature = gr.Dropdown(
-                        choices=self.init_retrieval_dropdown(),
-                        value="netvlad",
-                        label="Global features",
-                        interactive=True,
-                    )
-                    self.inputs.top_k = gr.Slider(
-                        label="Number of Images per Image to Match",
-                        minimum=1,
-                        maximum=100,
-                        value=10,
-                        step=1,
-                    )
-                # button_match = gr.Button("Run Matching", variant="primary")
-            # mapping setting
-            with gr.Column():
-                with gr.Accordion("Mapping Settings", open=True):
-                    with gr.Row():
-                        with gr.Accordion("Buddle Settings", open=True):
-                            with gr.Row():
-                                self.inputs.mapper_refine_focal_length = gr.Checkbox(
-                                    label="Refine Focal Length",
-                                    value=False,
-                                    interactive=True,
-                                )
-                                self.inputs.mapper_refine_principle_points = (
-                                    gr.Checkbox(
-                                        label="Refine Principle Points",
-                                        value=False,
-                                        interactive=True,
-                                    )
-                                )
-                                self.inputs.mapper_refine_extra_params = gr.Checkbox(
-                                    label="Refine Extra Params",
-                                    value=False,
-                                    interactive=True,
-                                )
-                    with gr.Accordion("Retriangluation Settings", open=True):
-                        gr.Textbox(
-                            label="Retriangluation Details",
-                        )
-                    self.ui.button_sfm = gr.Button("Run SFM", variant="primary")
-                self.outputs.model_3d = gr.Model3D(
-                    interactive=True,
-                )
-                self.outputs.output_image = gr.Image(
-                    label="SFM Visualize",
-                    type="numpy",
-                    image_mode="RGB",
-                    interactive=False,
-                )
-    def call_empty(self):
-        self.ui.button_sfm.click(fn=self.info, inputs=[], outputs=[])
-    def call(self):
-        self.ui.button_sfm.click(
-            fn=self.sfm_engine.call,
-            inputs=[
-                self.inputs.matcher_key,
-                self.inputs.input_images,  # images
-                self.inputs.camera_model,
-                self.inputs.camera_params,
-                self.inputs.max_keypoints,
-                self.inputs.keypoint_threshold,
-                self.inputs.match_threshold,
-                self.inputs.ransac_threshold,
-                self.inputs.ransac_confidence,
-                self.inputs.ransac_max_iter,
-                self.inputs.scene_graph,
-                self.inputs.global_feature,
-                self.inputs.top_k,
-                self.inputs.mapper_refine_focal_length,
-                self.inputs.mapper_refine_principle_points,
-                self.inputs.mapper_refine_extra_params,
-            ],
-            outputs=[self.outputs.model_3d, self.outputs.output_image],
-        )

+from pathlib import Path
+from typing import Any, Dict, Optional, Tuple
+import gradio as gr
+import numpy as np
+from easydict import EasyDict as edict
+from omegaconf import OmegaConf
+from .sfm import SfmEngine
+from .utils import (
+    GRADIO_VERSION,
+    gen_examples,
+    generate_warp_images,
+    get_matcher_zoo,
+    load_config,
+    ransac_zoo,
+    run_matching,
+    run_ransac,
+    send_to_match,
+)
+DESCRIPTION = """
+# Image Matching WebUI
+This Space demonstrates [Image Matching WebUI](https://github.com/Vincentqyw/image-matching-webui) by vincent qin. Feel free to play with it, or duplicate to run image matching without a queue!
+<br/>
+🔎 For more details about supported local features and matchers, please refer to https://github.com/Vincentqyw/image-matching-webui
+🚀 All algorithms run on CPU for inference, causing slow speeds and high latency. For faster inference, please download the [source code](https://github.com/Vincentqyw/image-matching-webui) for local deployment.
+🐛 Your feedback is valuable to me. Please do not hesitate to report any bugs [here](https://github.com/Vincentqyw/image-matching-webui/issues).
+"""
+CSS = """
+#warning {background-color: #FFCCCB}
+.logs_class textarea {font-size: 12px !important}
+"""
+class ImageMatchingApp:
+    def __init__(self, server_name="0.0.0.0", server_port=7860, **kwargs):
+        self.server_name = server_name
+        self.server_port = server_port
+        self.config_path = kwargs.get("config", Path(__file__).parent / "config.yaml")
+        self.cfg = load_config(self.config_path)
+        self.matcher_zoo = get_matcher_zoo(self.cfg["matcher_zoo"])
+        self.app = None
+        self.example_data_root = kwargs.get(
+            "example_data_root", Path(__file__).parents[1] / "datasets"
+        )
+        # final step
+        self.init_interface()
+    def init_matcher_dropdown(self):
+        algos = []
+        for k, v in self.cfg["matcher_zoo"].items():
+            if v.get("enable", True):
+                algos.append(k)
+        return algos
+    def init_interface(self):
+        with gr.Blocks(css=CSS) as self.app:
+            with gr.Tab("Image Matching"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        gr.Image(
+                            str(Path(__file__).parent.parent / "assets/logo.webp"),
+                            elem_id="logo-img",
+                            show_label=False,
+                            show_share_button=False,
+                            show_download_button=False,
+                        )
+                    with gr.Column(scale=3):
+                        gr.Markdown(DESCRIPTION)
+                with gr.Row(equal_height=False):
+                    with gr.Column():
+                        with gr.Row():
+                            matcher_list = gr.Dropdown(
+                                choices=self.init_matcher_dropdown(),
+                                value="disk+lightglue",
+                                label="Matching Model",
+                                interactive=True,
+                            )
+                            match_image_src = gr.Radio(
+                                (
+                                    ["upload", "webcam", "clipboard"]
+                                    if GRADIO_VERSION > "3"
+                                    else ["upload", "webcam", "canvas"]
+                                ),
+                                label="Image Source",
+                                value="upload",
+                            )
+                        with gr.Row():
+                            input_image0 = gr.Image(
+                                label="Image 0",
+                                type="numpy",
+                                image_mode="RGB",
+                                height=300 if GRADIO_VERSION > "3" else None,
+                                interactive=True,
+                            )
+                            input_image1 = gr.Image(
+                                label="Image 1",
+                                type="numpy",
+                                image_mode="RGB",
+                                height=300 if GRADIO_VERSION > "3" else None,
+                                interactive=True,
+                            )
+                        with gr.Row():
+                            button_reset = gr.Button(value="Reset")
+                            button_run = gr.Button(value="Run Match", variant="primary")
+                        with gr.Row():
+                            button_stop = gr.Button(value="Force Stop", variant="stop")
+                        with gr.Accordion("Advanced Setting", open=False):
+                            with gr.Accordion("Image Setting", open=True):
+                                with gr.Row():
+                                    image_force_resize_cb = gr.Checkbox(
+                                        label="Force Resize",
+                                        value=False,
+                                        interactive=True,
+                                    )
+                                    image_setting_height = gr.Slider(
+                                        minimum=48,
+                                        maximum=2048,
+                                        step=16,
+                                        label="Image Height",
+                                        value=480,
+                                        visible=False,
+                                    )
+                                    image_setting_width = gr.Slider(
+                                        minimum=64,
+                                        maximum=2048,
+                                        step=16,
+                                        label="Image Width",
+                                        value=640,
+                                        visible=False,
+                                    )
+                            with gr.Accordion("Matching Setting", open=True):
+                                with gr.Row():
+                                    match_setting_threshold = gr.Slider(
+                                        minimum=0.0,
+                                        maximum=1,
+                                        step=0.001,
+                                        label="Match threshold",
+                                        value=0.1,
+                                    )
+                                    match_setting_max_keypoints = gr.Slider(
+                                        minimum=10,
+                                        maximum=10000,
+                                        step=10,
+                                        label="Max features",
+                                        value=1000,
+                                    )
+                                # TODO: add line settings
+                                with gr.Row():
+                                    detect_keypoints_threshold = gr.Slider(
+                                        minimum=0,
+                                        maximum=1,
+                                        step=0.001,
+                                        label="Keypoint threshold",
+                                        value=0.015,
+                                    )
+                                    detect_line_threshold = (  # noqa: F841
+                                        gr.Slider(
+                                            minimum=0.1,
+                                            maximum=1,
+                                            step=0.01,
+                                            label="Line threshold",
+                                            value=0.2,
+                                        )
+                                    )
+                            with gr.Accordion("RANSAC Setting", open=True):
+                                with gr.Row(equal_height=False):
+                                    ransac_method = gr.Dropdown(
+                                        choices=ransac_zoo.keys(),
+                                        value=self.cfg["defaults"]["ransac_method"],
+                                        label="RANSAC Method",
+                                        interactive=True,
+                                    )
+                                ransac_reproj_threshold = gr.Slider(
+                                    minimum=0.0,
+                                    maximum=12,
+                                    step=0.01,
+                                    label="Ransac Reproj threshold",
+                                    value=8.0,
+                                )
+                                ransac_confidence = gr.Slider(
+                                    minimum=0.0,
+                                    maximum=1,
+                                    step=0.00001,
+                                    label="Ransac Confidence",
+                                    value=self.cfg["defaults"]["ransac_confidence"],
+                                )
+                                ransac_max_iter = gr.Slider(
+                                    minimum=0.0,
+                                    maximum=100000,
+                                    step=100,
+                                    label="Ransac Iterations",
+                                    value=self.cfg["defaults"]["ransac_max_iter"],
+                                )
+                                button_ransac = gr.Button(
+                                    value="Rerun RANSAC", variant="primary"
+                                )
+                            with gr.Accordion("Geometry Setting", open=False):
+                                with gr.Row(equal_height=False):
+                                    choice_geometry_type = gr.Radio(
+                                        ["Fundamental", "Homography"],
+                                        label="Reconstruct Geometry",
+                                        value=self.cfg["defaults"]["setting_geometry"],
+                                    )
+                        # image resize
+                        image_force_resize_cb.select(
+                            fn=self._on_select_force_resize,
+                            inputs=image_force_resize_cb,
+                            outputs=[image_setting_width, image_setting_height],
+                        )
+                        # collect inputs
+                        state_cache = gr.State({})
+                        inputs = [
+                            input_image0,
+                            input_image1,
+                            match_setting_threshold,
+                            match_setting_max_keypoints,
+                            detect_keypoints_threshold,
+                            matcher_list,
+                            ransac_method,
+                            ransac_reproj_threshold,
+                            ransac_confidence,
+                            ransac_max_iter,
+                            choice_geometry_type,
+                            gr.State(self.matcher_zoo),
+                            image_force_resize_cb,
+                            image_setting_width,
+                            image_setting_height,
+                        ]
+                        # Add some examples
+                        with gr.Row():
+                            # Example inputs
+                            with gr.Accordion("Open for More: Examples", open=True):
+                                gr.Examples(
+                                    examples=gen_examples(self.example_data_root),
+                                    inputs=inputs,
+                                    outputs=[],
+                                    fn=run_matching,
+                                    cache_examples=False,
+                                    label=(
+                                        "Examples (click one of the images below to Run"
+                                        " Match). Thx: WxBS"
+                                    ),
+                                )
+                        with gr.Accordion("Supported Algorithms", open=False):
+                            # add a table of supported algorithms
+                            self.display_supported_algorithms()
+                    with gr.Column():
+                        with gr.Accordion("Open for More: Keypoints", open=True):
+                            output_keypoints = gr.Image(label="Keypoints", type="numpy")
+                        with gr.Accordion(
+                            (
+                                "Open for More: Raw Matches"
+                                " (Green for good matches, Red for bad)"
+                            ),
+                            open=False,
+                        ):
+                            output_matches_raw = gr.Image(
+                                label="Raw Matches",
+                                type="numpy",
+                            )
+                        with gr.Accordion(
+                            (
+                                "Open for More: Ransac Matches"
+                                " (Green for good matches, Red for bad)"
+                            ),
+                            open=True,
+                        ):
+                            output_matches_ransac = gr.Image(
+                                label="Ransac Matches", type="numpy"
+                            )
+                        with gr.Accordion(
+                            "Open for More: Matches Statistics", open=False
+                        ):
+                            output_pred = gr.File(label="Outputs", elem_id="download")
+                            matches_result_info = gr.JSON(label="Matches Statistics")
+                            matcher_info = gr.JSON(label="Match info")
+                        with gr.Accordion("Open for More: Warped Image", open=True):
+                            output_wrapped = gr.Image(
+                                label="Wrapped Pair", type="numpy"
+                            )
+                            # send to input
+                            button_rerun = gr.Button(
+                                value="Send to Input Match Pair",
+                                variant="primary",
+                            )
+                            with gr.Accordion(
+                                "Open for More: Geometry info", open=False
+                            ):
+                                geometry_result = gr.JSON(
+                                    label="Reconstructed Geometry"
+                                )
+                    # callbacks
+                    match_image_src.change(
+                        fn=self.ui_change_imagebox,
+                        inputs=match_image_src,
+                        outputs=input_image0,
+                    )
+                    match_image_src.change(
+                        fn=self.ui_change_imagebox,
+                        inputs=match_image_src,
+                        outputs=input_image1,
+                    )
+                    # collect outputs
+                    outputs = [
+                        output_keypoints,
+                        output_matches_raw,
+                        output_matches_ransac,
+                        matches_result_info,
+                        matcher_info,
+                        geometry_result,
+                        output_wrapped,
+                        state_cache,
+                        output_pred,
+                    ]
+                    # button callbacks
+                    click_event = button_run.click(
+                        fn=run_matching, inputs=inputs, outputs=outputs
+                    )
+                    # stop button
+                    button_stop.click(
+                        fn=None, inputs=None, outputs=None, cancels=[click_event]
+                    )
+                    # Reset images
+                    reset_outputs = [
+                        input_image0,
+                        input_image1,
+                        match_setting_threshold,
+                        match_setting_max_keypoints,
+                        detect_keypoints_threshold,
+                        matcher_list,
+                        input_image0,
+                        input_image1,
+                        match_image_src,
+                        output_keypoints,
+                        output_matches_raw,
+                        output_matches_ransac,
+                        matches_result_info,
+                        matcher_info,
+                        output_wrapped,
+                        geometry_result,
+                        ransac_method,
+                        ransac_reproj_threshold,
+                        ransac_confidence,
+                        ransac_max_iter,
+                        choice_geometry_type,
+                        output_pred,
+                        image_force_resize_cb,
+                    ]
+                    button_reset.click(
+                        fn=self.ui_reset_state,
+                        inputs=None,
+                        outputs=reset_outputs,
+                    )
+                    # run ransac button action
+                    button_ransac.click(
+                        fn=run_ransac,
+                        inputs=[
+                            state_cache,
+                            choice_geometry_type,
+                            ransac_method,
+                            ransac_reproj_threshold,
+                            ransac_confidence,
+                            ransac_max_iter,
+                        ],
+                        outputs=[
+                            output_matches_ransac,
+                            matches_result_info,
+                            output_wrapped,
+                            output_pred,
+                        ],
+                    )
+                    # send warped image to match
+                    button_rerun.click(
+                        fn=send_to_match,
+                        inputs=[state_cache],
+                        outputs=[input_image0, input_image1],
+                    )
+                    # estimate geo
+                    choice_geometry_type.change(
+                        fn=generate_warp_images,
+                        inputs=[
+                            input_image0,
+                            input_image1,
+                            geometry_result,
+                            choice_geometry_type,
+                        ],
+                        outputs=[output_wrapped, geometry_result],
+                    )
+            with gr.Tab("Structure from Motion(under-dev)"):
+                sfm_ui = AppSfmUI(  # noqa: F841
+                    {
+                        **self.cfg,
+                        "matcher_zoo": self.matcher_zoo,
+                        "outputs": "experiments/sfm",
+                    }
+                )
+                sfm_ui.call_empty()
+    def run(self):
+        self.app.queue().launch(
+            server_name=self.server_name,
+            server_port=self.server_port,
+            share=False,
+            allowed_paths=[
+                str(Path(__file__).parents[0]),
+                str(Path(__file__).parents[1]),
+            ],
+        )
+    def ui_change_imagebox(self, choice):
+        """
+        Updates the image box with the given choice.
+        Args:
+            choice (list): The list of image sources to be displayed in the image box.
+        Returns:
+            dict: A dictionary containing the updated value, sources, and type for the image box.
+        """
+        ret_dict = {
+            "value": None,  # The updated value of the image box
+            "__type__": "update",  # The type of update for the image box
+        }
+        if GRADIO_VERSION > "3":
+            return {
+                **ret_dict,
+                "sources": choice,  # The list of image sources to be displayed
+            }
+        else:
+            return {
+                **ret_dict,
+                "source": choice,  # The list of image sources to be displayed
+            }
+    def _on_select_force_resize(self, visible: bool = False):
+        return gr.update(visible=visible), gr.update(visible=visible)
+    def ui_reset_state(
+        self,
+        *args: Any,
+    ) -> Tuple[
+        Optional[np.ndarray],
+        Optional[np.ndarray],
+        float,
+        int,
+        float,
+        str,
+        Dict[str, Any],
+        Dict[str, Any],
+        str,
+        Optional[np.ndarray],
+        Optional[np.ndarray],
+        Optional[np.ndarray],
+        Dict[str, Any],
+        Dict[str, Any],
+        Optional[np.ndarray],
+        Dict[str, Any],
+        str,
+        int,
+        float,
+        int,
+        bool,
+    ]:
+        """
+        Reset the state of the UI.
+        Returns:
+            tuple: A tuple containing the initial values for the UI state.
+        """
+        key: str = list(self.matcher_zoo.keys())[
+            0
+        ]  # Get the first key from matcher_zoo
+        # flush_logs()
+        return (
+            None,  # image0: Optional[np.ndarray]
+            None,  # image1: Optional[np.ndarray]
+            self.cfg["defaults"]["match_threshold"],  # matching_threshold: float
+            self.cfg["defaults"]["max_keypoints"],  # max_keypoints: int
+            self.cfg["defaults"]["keypoint_threshold"],  # keypoint_threshold: float
+            key,  # matcher: str
+            self.ui_change_imagebox("upload"),  # input image0: Dict[str, Any]
+            self.ui_change_imagebox("upload"),  # input image1: Dict[str, Any]
+            "upload",  # match_image_src: str
+            None,  # keypoints: Optional[np.ndarray]
+            None,  # raw matches: Optional[np.ndarray]
+            None,  # ransac matches: Optional[np.ndarray]
+            {},  # matches result info: Dict[str, Any]
+            {},  # matcher config: Dict[str, Any]
+            None,  # warped image: Optional[np.ndarray]
+            {},  # geometry result: Dict[str, Any]
+            self.cfg["defaults"]["ransac_method"],  # ransac_method: str
+            self.cfg["defaults"][
+                "ransac_reproj_threshold"
+            ],  # ransac_reproj_threshold: float
+            self.cfg["defaults"]["ransac_confidence"],  # ransac_confidence: float
+            self.cfg["defaults"]["ransac_max_iter"],  # ransac_max_iter: int
+            self.cfg["defaults"]["setting_geometry"],  # geometry: str
+            None,  # predictions
+            False,
+        )
+    def display_supported_algorithms(self, style="tab"):
+        def get_link(link, tag="Link"):
+            return "[{}]({})".format(tag, link) if link is not None else "None"
+        data = []
+        cfg = self.cfg["matcher_zoo"]
+        if style == "md":
+            markdown_table = "| Algo. | Conference | Code | Project | Paper |\n"
+            markdown_table += "| ----- | ---------- | ---- | ------- | ----- |\n"
+            for _, v in cfg.items():
+                if not v["info"].get("display", True):
+                    continue
+                github_link = get_link(v["info"].get("github", ""))
+                project_link = get_link(v["info"].get("project", ""))
+                paper_link = get_link(
+                    v["info"]["paper"],
+                    (
+                        Path(v["info"]["paper"]).name[-10:]
+                        if v["info"]["paper"] is not None
+                        else "Link"
+                    ),
+                )
+                markdown_table += "{}|{}|{}|{}|{}\n".format(
+                    v["info"].get("name", ""),
+                    v["info"].get("source", ""),
+                    github_link,
+                    project_link,
+                    paper_link,
+                )
+            return gr.Markdown(markdown_table)
+        elif style == "tab":
+            for k, v in cfg.items():
+                if not v["info"].get("display", True):
+                    continue
+                data.append(
+                    [
+                        v["info"].get("name", ""),
+                        v["info"].get("source", ""),
+                        v["info"].get("github", ""),
+                        v["info"].get("paper", ""),
+                        v["info"].get("project", ""),
+                    ]
+                )
+            tab = gr.Dataframe(
+                headers=["Algo.", "Conference", "Code", "Paper", "Project"],
+                datatype=["str", "str", "str", "str", "str"],
+                col_count=(5, "fixed"),
+                value=data,
+                # wrap=True,
+                # min_width = 1000,
+                # height=1000,
+            )
+            return tab
+class AppBaseUI:
+    def __init__(self, cfg: Dict[str, Any] = {}):
+        self.cfg = OmegaConf.create(cfg)
+        self.inputs = edict({})
+        self.outputs = edict({})
+        self.ui = edict({})
+    def _init_ui(self):
+        NotImplemented
+    def call(self, **kwargs):
+        NotImplemented
+    def info(self):
+        gr.Info("SFM is under construction.")
+class AppSfmUI(AppBaseUI):
+    def __init__(self, cfg: Dict[str, Any] = None):
+        super().__init__(cfg)
+        assert "matcher_zoo" in self.cfg
+        self.matcher_zoo = self.cfg["matcher_zoo"]
+        self.sfm_engine = SfmEngine(cfg)
+        self._init_ui()
+    def init_retrieval_dropdown(self):
+        algos = []
+        for k, v in self.cfg["retrieval_zoo"].items():
+            if v.get("enable", True):
+                algos.append(k)
+        return algos
+    def _update_options(self, option):
+        if option == "sparse":
+            return gr.Textbox("sparse", visible=True)
+        elif option == "dense":
+            return gr.Textbox("dense", visible=True)
+        else:
+            return gr.Textbox("not set", visible=True)
+    def _on_select_custom_params(self, value: bool = False):
+        return gr.update(visible=value)
+    def _init_ui(self):
+        with gr.Row():
+            # data settting and camera settings
+            with gr.Column():
+                self.inputs.input_images = gr.File(
+                    label="SfM",
+                    interactive=True,
+                    file_count="multiple",
+                    min_width=300,
+                )
+                # camera setting
+                with gr.Accordion("Camera Settings", open=True):
+                    with gr.Column():
+                        with gr.Row():
+                            with gr.Column():
+                                self.inputs.camera_model = gr.Dropdown(
+                                    choices=[
+                                        "PINHOLE",
+                                        "SIMPLE_RADIAL",
+                                        "OPENCV",
+                                    ],
+                                    value="PINHOLE",
+                                    label="Camera Model",
+                                    interactive=True,
+                                )
+                            with gr.Column():
+                                gr.Checkbox(
+                                    label="Shared Params",
+                                    value=True,
+                                    interactive=True,
+                                )
+                                camera_custom_params_cb = gr.Checkbox(
+                                    label="Custom Params",
+                                    value=False,
+                                    interactive=True,
+                                )
+                        with gr.Row():
+                            self.inputs.camera_params = gr.Textbox(
+                                label="Camera Params",
+                                value="0,0,0,0",
+                                interactive=False,
+                                visible=False,
+                            )
+                        camera_custom_params_cb.select(
+                            fn=self._on_select_custom_params,
+                            inputs=camera_custom_params_cb,
+                            outputs=self.inputs.camera_params,
+                        )
+                with gr.Accordion("Matching Settings", open=True):
+                    # feature extraction and matching setting
+                    with gr.Row():
+                        # matcher setting
+                        self.inputs.matcher_key = gr.Dropdown(
+                            choices=self.matcher_zoo.keys(),
+                            value="disk+lightglue",
+                            label="Matching Model",
+                            interactive=True,
+                        )
+                    with gr.Row():
+                        with gr.Accordion("Advanced Settings", open=False):
+                            with gr.Column():
+                                with gr.Row():
+                                    # matching setting
+                                    self.inputs.max_keypoints = gr.Slider(
+                                        label="Max Keypoints",
+                                        minimum=100,
+                                        maximum=10000,
+                                        value=1000,
+                                        interactive=True,
+                                    )
+                                    self.inputs.keypoint_threshold = gr.Slider(
+                                        label="Keypoint Threshold",
+                                        minimum=0,
+                                        maximum=1,
+                                        value=0.01,
+                                    )
+                                with gr.Row():
+                                    self.inputs.match_threshold = gr.Slider(
+                                        label="Match Threshold",
+                                        minimum=0.01,
+                                        maximum=12.0,
+                                        value=0.2,
+                                    )
+                                    self.inputs.ransac_threshold = gr.Slider(
+                                        label="Ransac Threshold",
+                                        minimum=0.01,
+                                        maximum=12.0,
+                                        value=4.0,
+                                        step=0.01,
+                                        interactive=True,
+                                    )
+                                with gr.Row():
+                                    self.inputs.ransac_confidence = gr.Slider(
+                                        label="Ransac Confidence",
+                                        minimum=0.01,
+                                        maximum=1.0,
+                                        value=0.9999,
+                                        step=0.0001,
+                                        interactive=True,
+                                    )
+                                    self.inputs.ransac_max_iter = gr.Slider(
+                                        label="Ransac Max Iter",
+                                        minimum=1,
+                                        maximum=100,
+                                        value=100,
+                                        step=1,
+                                        interactive=True,
+                                    )
+                with gr.Accordion("Scene Graph Settings", open=True):
+                    # mapping setting
+                    self.inputs.scene_graph = gr.Dropdown(
+                        choices=["all", "swin", "oneref"],
+                        value="all",
+                        label="Scene Graph",
+                        interactive=True,
+                    )
+                    # global feature setting
+                    self.inputs.global_feature = gr.Dropdown(
+                        choices=self.init_retrieval_dropdown(),
+                        value="netvlad",
+                        label="Global features",
+                        interactive=True,
+                    )
+                    self.inputs.top_k = gr.Slider(
+                        label="Number of Images per Image to Match",
+                        minimum=1,
+                        maximum=100,
+                        value=10,
+                        step=1,
+                    )
+                # button_match = gr.Button("Run Matching", variant="primary")
+            # mapping setting
+            with gr.Column():
+                with gr.Accordion("Mapping Settings", open=True):
+                    with gr.Row():
+                        with gr.Accordion("Buddle Settings", open=True):
+                            with gr.Row():
+                                self.inputs.mapper_refine_focal_length = gr.Checkbox(
+                                    label="Refine Focal Length",
+                                    value=False,
+                                    interactive=True,
+                                )
+                                self.inputs.mapper_refine_principle_points = (
+                                    gr.Checkbox(
+                                        label="Refine Principle Points",
+                                        value=False,
+                                        interactive=True,
+                                    )
+                                )
+                                self.inputs.mapper_refine_extra_params = gr.Checkbox(
+                                    label="Refine Extra Params",
+                                    value=False,
+                                    interactive=True,
+                                )
+                    with gr.Accordion("Retriangluation Settings", open=True):
+                        gr.Textbox(
+                            label="Retriangluation Details",
+                        )
+                    self.ui.button_sfm = gr.Button("Run SFM", variant="primary")
+                self.outputs.model_3d = gr.Model3D(
+                    interactive=True,
+                )
+                self.outputs.output_image = gr.Image(
+                    label="SFM Visualize",
+                    type="numpy",
+                    image_mode="RGB",
+                    interactive=False,
+                )
+    def call_empty(self):
+        self.ui.button_sfm.click(fn=self.info, inputs=[], outputs=[])
+    def call(self):
+        self.ui.button_sfm.click(
+            fn=self.sfm_engine.call,
+            inputs=[
+                self.inputs.matcher_key,
+                self.inputs.input_images,  # images
+                self.inputs.camera_model,
+                self.inputs.camera_params,
+                self.inputs.max_keypoints,
+                self.inputs.keypoint_threshold,
+                self.inputs.match_threshold,
+                self.inputs.ransac_threshold,
+                self.inputs.ransac_confidence,
+                self.inputs.ransac_max_iter,
+                self.inputs.scene_graph,
+                self.inputs.global_feature,
+                self.inputs.top_k,
+                self.inputs.mapper_refine_focal_length,
+                self.inputs.mapper_refine_principle_points,
+                self.inputs.mapper_refine_extra_params,
+            ],
+            outputs=[self.outputs.model_3d, self.outputs.output_image],
+        )

imcui/ui/modelcache.py ADDED Viewed

	@@ -0,0 +1,371 @@

+import hashlib
+import json
+import time
+import threading
+from collections import OrderedDict
+import torch
+from ..hloc import logger
+class ARCSizeAwareModelCache:
+    def __init__(
+        self,
+        max_gpu_mem: float = 8e9,
+        max_cpu_mem: float = 12e9,
+        device_priority: list = ["cuda", "cpu"],
+        auto_empty_cache: bool = True,
+    ):
+        """
+        Initialize the model cache.
+        Args:
+            max_gpu_mem: Maximum GPU memory allowed in bytes.
+            max_cpu_mem: Maximum CPU memory allowed in bytes.
+            device_priority: List of devices to prioritize when evicting models.
+            auto_empty_cache: Whether to call torch.cuda.empty_cache() when out of memory.
+        """
+        self.t1 = OrderedDict()
+        self.t2 = OrderedDict()
+        self.b1 = OrderedDict()
+        self.b2 = OrderedDict()
+        self.max_gpu = max_gpu_mem
+        self.max_cpu = max_cpu_mem
+        self.current_gpu = 0
+        self.current_cpu = 0
+        self.p = 0
+        self.adaptive_factor = 0.5
+        self.device_priority = device_priority
+        self.lock = threading.Lock()
+        self.auto_empty_cache = auto_empty_cache
+        logger.info("ARCSizeAwareModelCache initialized.")
+    def _release_model(self, model_entry):
+        """
+        Release a model from memory.
+        Args:
+            model_entry: A dictionary containing the model, device and other information.
+        Notes:
+            If the device is CUDA and auto_empty_cache is True, torch.cuda.empty_cache() is called after releasing the model.
+        """
+        model = model_entry["model"]
+        device = model_entry["device"]
+        del model
+        if device == "cuda":
+            torch.cuda.synchronize()
+            if self.auto_empty_cache:
+                torch.cuda.empty_cache()
+    def generate_key(self, model_key, model_conf: dict) -> str:
+        loader_identifier = f"{model_key}"
+        unique_str = f"{loader_identifier}-{json.dumps(model_conf, sort_keys=True)}"
+        return hashlib.sha256(unique_str.encode()).hexdigest()
+    def _get_device(self, model_size: int) -> str:
+        for device in self.device_priority:
+            if device == "cuda" and torch.cuda.is_available():
+                if self.current_gpu + model_size <= self.max_gpu:
+                    return "cuda"
+            elif device == "cpu":
+                if self.current_cpu + model_size <= self.max_cpu:
+                    return "cpu"
+        return "cpu"
+    def _calculate_model_size(self, model):
+        return sum(p.numel() * p.element_size() for p in model.parameters()) + sum(
+            b.numel() * b.element_size() for b in model.buffers()
+        )
+    def _update_access(self, key: str, size: int, device: str):
+        if key in self.b1:
+            self.p = min(
+                self.p + max(1, len(self.b2) // len(self.b1)),
+                len(self.t1) + len(self.t2),
+            )
+            self.b1.pop(key)
+            self._replace(False)
+        elif key in self.b2:
+            self.p = max(self.p - max(1, len(self.b1) // len(self.b2)), 0)
+            self.b2.pop(key)
+            self._replace(True)
+        if key in self.t1:
+            self.t1.pop(key)
+        self.t2[key] = {
+            "size": size,
+            "device": device,
+            "access_count": 1,
+            "last_accessed": time.time(),
+        }
+    def _replace(self, in_t2: bool):
+        if len(self.t1) > 0 and (
+            (len(self.t1) > self.p) or (in_t2 and len(self.t1) == self.p)
+        ):
+            k, v = self.t1.popitem(last=False)
+            self.b1[k] = v
+        else:
+            k, v = self.t2.popitem(last=False)
+            self.b2[k] = v
+    def _calculate_weight(self, entry) -> float:
+        return entry["access_count"] / entry["size"]
+    def _evict_models(self, required_size: int, target_device: str) -> bool:
+        candidates = []
+        for k, v in list(self.t1.items()) + list(self.t2.items()):
+            if v["device"] == target_device:
+                candidates.append((k, v))
+        candidates.sort(key=lambda x: self._calculate_weight(x[1]))
+        freed = 0
+        for k, v in candidates:
+            self._release_model(v)
+            freed += v["size"]
+            if v in self.t1:
+                self.t1.pop(k)
+            if v in self.t2:
+                self.t2.pop(k)
+            if v["device"] == "cuda":
+                self.current_gpu -= v["size"]
+            else:
+                self.current_cpu -= v["size"]
+            if freed >= required_size:
+                return True
+        if target_device == "cuda":
+            return self._cross_device_evict(required_size, "cuda")
+        return False
+    def _cross_device_evict(self, required_size: int, target_device: str) -> bool:
+        all_entries = []
+        for k, v in list(self.t1.items()) + list(self.t2.items()):
+            all_entries.append((k, v))
+        all_entries.sort(
+            key=lambda x: self._calculate_weight(x[1])
+            + (0.5 if x[1]["device"] == target_device else 0)
+        )
+        freed = 0
+        for k, v in all_entries:
+            freed += v["size"]
+            if v in self.t1:
+                self.t1.pop(k)
+            if v in self.t2:
+                self.t2.pop(k)
+            if v["device"] == "cuda":
+                self.current_gpu -= v["size"]
+            else:
+                self.current_cpu -= v["size"]
+            if freed >= required_size:
+                return True
+        return False
+    def load_model(self, model_key, model_loader_func, model_conf: dict):
+        key = self.generate_key(model_key, model_conf)
+        with self.lock:
+            if key in self.t1 or key in self.t2:
+                entry = self.t1.pop(key, None) or self.t2.pop(key)
+                entry["access_count"] += 1
+                self.t2[key] = entry
+                return entry["model"]
+            raw_model = model_loader_func(model_conf)
+            model_size = self._calculate_model_size(raw_model)
+            device = self._get_device(model_size)
+            if device == "cuda" and self.auto_empty_cache:
+                torch.cuda.empty_cache()
+                torch.cuda.synchronize()
+            while True:
+                current_mem = self.current_gpu if device == "cuda" else self.current_cpu
+                max_mem = self.max_gpu if device == "cuda" else self.max_cpu
+                if current_mem + model_size <= max_mem:
+                    break
+                if not self._evict_models(model_size, device):
+                    if device == "cuda":
+                        device = "cpu"
+                    else:
+                        raise RuntimeError("Out of memory")
+            try:
+                model = raw_model.to(device)
+            except RuntimeError as e:
+                if "CUDA out of memory" in str(e):
+                    torch.cuda.empty_cache()
+                    model = raw_model.to(device)
+            new_entry = {
+                "model": model,
+                "size": model_size,
+                "device": device,
+                "access_count": 1,
+                "last_accessed": time.time(),
+            }
+            if key in self.b1 or key in self.b2:
+                self.t2[key] = new_entry
+                self._replace(True)
+            else:
+                self.t1[key] = new_entry
+                self._replace(False)
+            if device == "cuda":
+                self.current_gpu += model_size
+            else:
+                self.current_cpu += model_size
+            return model
+    def clear_device_cache(self, device: str):
+        with self.lock:
+            for cache in [self.t1, self.t2, self.b1, self.b2]:
+                for k in list(cache.keys()):
+                    if cache[k]["device"] == device:
+                        cache.pop(k)
+class LRUModelCache:
+    def __init__(
+        self,
+        max_gpu_mem: float = 8e9,
+        max_cpu_mem: float = 12e9,
+        device_priority: list = ["cuda", "cpu"],
+    ):
+        self.cache = OrderedDict()
+        self.max_gpu = max_gpu_mem
+        self.max_cpu = max_cpu_mem
+        self.current_gpu = 0
+        self.current_cpu = 0
+        self.lock = threading.Lock()
+        self.device_priority = device_priority
+    def generate_key(self, model_key, model_conf: dict) -> str:
+        loader_identifier = f"{model_key}"
+        unique_str = f"{loader_identifier}-{json.dumps(model_conf, sort_keys=True)}"
+        return hashlib.sha256(unique_str.encode()).hexdigest()
+    def get_device(self) -> str:
+        for device in self.device_priority:
+            if device == "cuda" and torch.cuda.is_available():
+                if self.current_gpu < self.max_gpu:
+                    return device
+            elif device == "cpu":
+                if self.current_cpu < self.max_cpu:
+                    return device
+        return "cpu"
+    def _calculate_model_size(self, model):
+        param_size = sum(p.numel() * p.element_size() for p in model.parameters())
+        buffer_size = sum(b.numel() * b.element_size() for b in model.buffers())
+        return param_size + buffer_size
+    def load_model(self, model_key, model_loader_func, model_conf: dict):
+        key = self.generate_key(model_key, model_conf)
+        with self.lock:
+            if key in self.cache:
+                self.cache.move_to_end(key)  # update LRU
+                return self.cache[key]["model"]
+            device = self.get_device()
+            if device == "cuda":
+                torch.cuda.empty_cache()
+            try:
+                raw_model = model_loader_func(model_conf)
+            except Exception as e:
+                raise RuntimeError(f"Model loading failed: {str(e)}")
+            try:
+                model = raw_model.to(device)
+            except RuntimeError as e:
+                if "CUDA out of memory" in str(e):
+                    return self._handle_oom(model_key, model_loader_func, model_conf)
+                raise
+            model_size = self._calculate_model_size(model)
+            while (
+                device == "cuda" and (self.current_gpu + model_size > self.max_gpu)
+            ) or (device == "cpu" and (self.current_cpu + model_size > self.max_cpu)):
+                if not self._free_space(model_size, device):
+                    raise RuntimeError("Insufficient memory even after cache cleanup")
+            if device == "cuda":
+                self.current_gpu += model_size
+            else:
+                self.current_cpu += model_size
+            self.cache[key] = {
+                "model": model,
+                "size": model_size,
+                "device": device,
+                "timestamp": time.time(),
+            }
+            return model
+    def _free_space(self, required_size: int, device: str) -> bool:
+        for key in list(self.cache.keys()):
+            if (device == "cuda" and self.cache[key]["device"] == "cuda") or (
+                device == "cpu" and self.cache[key]["device"] == "cpu"
+            ):
+                self.current_gpu -= (
+                    self.cache[key]["size"]
+                    if self.cache[key]["device"] == "cuda"
+                    else 0
+                )
+                self.current_cpu -= (
+                    self.cache[key]["size"] if self.cache[key]["device"] == "cpu" else 0
+                )
+                del self.cache[key]
+                if (
+                    device == "cuda"
+                    and self.current_gpu + required_size <= self.max_gpu
+                ) or (
+                    device == "cpu" and self.current_cpu + required_size <= self.max_cpu
+                ):
+                    return True
+        return False
+    def _handle_oom(self, model_key, model_loader_func, model_conf: dict):
+        with self.lock:
+            self.clear_device_cache("cuda")
+            torch.cuda.empty_cache()
+            try:
+                return self.load_model(model_key, model_loader_func, model_conf)
+            except RuntimeError:
+                original_priority = self.device_priority
+                self.device_priority = ["cpu"]
+                try:
+                    return self.load_model(model_key, model_loader_func, model_conf)
+                finally:
+                    self.device_priority = original_priority
+    def clear_device_cache(self, device: str):
+        with self.lock:
+            keys_to_remove = [k for k, v in self.cache.items() if v["device"] == device]
+            for k in keys_to_remove:
+                self.current_gpu -= self.cache[k]["size"] if device == "cuda" else 0
+                self.current_cpu -= self.cache[k]["size"] if device == "cpu" else 0
+                del self.cache[k]

imcui/ui/sfm.py CHANGED Viewed

@@ -1,164 +1,164 @@
-import shutil
-import tempfile
-from pathlib import Path
-from typing import Any, Dict, List
-from ..hloc import (
-    extract_features,
-    logger,
-    match_features,
-    pairs_from_retrieval,
-    reconstruction,
-    visualization,
-)
-try:
-    import pycolmap
-except ImportError:
-    logger.warning("pycolmap not installed, some features may not work")
-from .viz import fig2im
-class SfmEngine:
-    def __init__(self, cfg: Dict[str, Any] = None):
-        self.cfg = cfg
-        if "outputs" in cfg and Path(cfg["outputs"]):
-            outputs = Path(cfg["outputs"])
-            outputs.mkdir(parents=True, exist_ok=True)
-        else:
-            outputs = tempfile.mkdtemp()
-        self.outputs = Path(outputs)
-    def call(
-        self,
-        key: str,
-        images: Path,
-        camera_model: str,
-        camera_params: List[float],
-        max_keypoints: int,
-        keypoint_threshold: float,
-        match_threshold: float,
-        ransac_threshold: int,
-        ransac_confidence: float,
-        ransac_max_iter: int,
-        scene_graph: bool,
-        global_feature: str,
-        top_k: int = 10,
-        mapper_refine_focal_length: bool = False,
-        mapper_refine_principle_points: bool = False,
-        mapper_refine_extra_params: bool = False,
-    ):
-        """
-        Call a list of functions to perform feature extraction, matching, and reconstruction.
-        Args:
-            key (str): The key to retrieve the matcher and feature models.
-            images (Path): The directory containing the images.
-            outputs (Path): The directory to store the outputs.
-            camera_model (str): The camera model.
-            camera_params (List[float]): The camera parameters.
-            max_keypoints (int): The maximum number of features.
-            match_threshold (float): The match threshold.
-            ransac_threshold (int): The RANSAC threshold.
-            ransac_confidence (float): The RANSAC confidence.
-            ransac_max_iter (int): The maximum number of RANSAC iterations.
-            scene_graph (bool): Whether to compute the scene graph.
-            global_feature (str): Whether to compute the global feature.
-            top_k (int): The number of image-pair to use.
-            mapper_refine_focal_length (bool): Whether to refine the focal length.
-            mapper_refine_principle_points (bool): Whether to refine the principle points.
-            mapper_refine_extra_params (bool): Whether to refine the extra parameters.
-        Returns:
-            Path: The directory containing the SfM results.
-        """
-        if len(images) == 0:
-            logger.error(f"{images} does not exist.")
-        temp_images = Path(tempfile.mkdtemp())
-        # copy images
-        logger.info(f"Copying images to {temp_images}.")
-        for image in images:
-            shutil.copy(image, temp_images)
-        matcher_zoo = self.cfg["matcher_zoo"]
-        model = matcher_zoo[key]
-        match_conf = model["matcher"]
-        match_conf["model"]["max_keypoints"] = max_keypoints
-        match_conf["model"]["match_threshold"] = match_threshold
-        feature_conf = model["feature"]
-        feature_conf["model"]["max_keypoints"] = max_keypoints
-        feature_conf["model"]["keypoint_threshold"] = keypoint_threshold
-        # retrieval
-        retrieval_name = self.cfg.get("retrieval_name", "netvlad")
-        retrieval_conf = extract_features.confs[retrieval_name]
-        mapper_options = {
-            "ba_refine_extra_params": mapper_refine_extra_params,
-            "ba_refine_focal_length": mapper_refine_focal_length,
-            "ba_refine_principal_point": mapper_refine_principle_points,
-            "ba_local_max_num_iterations": 40,
-            "ba_local_max_refinements": 3,
-            "ba_global_max_num_iterations": 100,
-            # below 3 options are for individual/video data, for internet photos, they should be left
-            # default
-            "min_focal_length_ratio": 0.1,
-            "max_focal_length_ratio": 10,
-            "max_extra_param": 1e15,
-        }
-        sfm_dir = self.outputs / "sfm_{}".format(key)
-        sfm_pairs = self.outputs / "pairs-sfm.txt"
-        sfm_dir.mkdir(exist_ok=True, parents=True)
-        # extract features
-        retrieval_path = extract_features.main(
-            retrieval_conf, temp_images, self.outputs
-        )
-        pairs_from_retrieval.main(retrieval_path, sfm_pairs, num_matched=top_k)
-        feature_path = extract_features.main(feature_conf, temp_images, self.outputs)
-        # match features
-        match_path = match_features.main(
-            match_conf, sfm_pairs, feature_conf["output"], self.outputs
-        )
-        # reconstruction
-        already_sfm = False
-        if sfm_dir.exists():
-            try:
-                model = pycolmap.Reconstruction(str(sfm_dir))
-                already_sfm = True
-            except ValueError:
-                logger.info(f"sfm_dir not exists model: {sfm_dir}")
-        if not already_sfm:
-            model = reconstruction.main(
-                sfm_dir,
-                temp_images,
-                sfm_pairs,
-                feature_path,
-                match_path,
-                mapper_options=mapper_options,
-            )
-        vertices = []
-        for point3D_id, point3D in model.points3D.items():
-            vertices.append([point3D.xyz, point3D.color])
-        model_3d = sfm_dir / "points3D.obj"
-        with open(model_3d, "w") as f:
-            for p, c in vertices:
-                # Write vertex position
-                f.write("v {} {} {}\n".format(p[0], p[1], p[2]))
-                # Write vertex normal (color)
-                f.write(
-                    "vn {} {} {}\n".format(c[0] / 255.0, c[1] / 255.0, c[2] / 255.0)
-                )
-        viz_2d = visualization.visualize_sfm_2d(
-            model, temp_images, color_by="visibility", n=2, dpi=300
-        )
-        return model_3d, fig2im(viz_2d) / 255.0

+import shutil
+import tempfile
+from pathlib import Path
+from typing import Any, Dict, List
+from ..hloc import (
+    extract_features,
+    logger,
+    match_features,
+    pairs_from_retrieval,
+    reconstruction,
+    visualization,
+)
+try:
+    import pycolmap
+except ImportError:
+    logger.warning("pycolmap not installed, some features may not work")
+from .viz import fig2im
+class SfmEngine:
+    def __init__(self, cfg: Dict[str, Any] = None):
+        self.cfg = cfg
+        if "outputs" in cfg and Path(cfg["outputs"]):
+            outputs = Path(cfg["outputs"])
+            outputs.mkdir(parents=True, exist_ok=True)
+        else:
+            outputs = tempfile.mkdtemp()
+        self.outputs = Path(outputs)
+    def call(
+        self,
+        key: str,
+        images: Path,
+        camera_model: str,
+        camera_params: List[float],
+        max_keypoints: int,
+        keypoint_threshold: float,
+        match_threshold: float,
+        ransac_threshold: int,
+        ransac_confidence: float,
+        ransac_max_iter: int,
+        scene_graph: bool,
+        global_feature: str,
+        top_k: int = 10,
+        mapper_refine_focal_length: bool = False,
+        mapper_refine_principle_points: bool = False,
+        mapper_refine_extra_params: bool = False,
+    ):
+        """
+        Call a list of functions to perform feature extraction, matching, and reconstruction.
+        Args:
+            key (str): The key to retrieve the matcher and feature models.
+            images (Path): The directory containing the images.
+            outputs (Path): The directory to store the outputs.
+            camera_model (str): The camera model.
+            camera_params (List[float]): The camera parameters.
+            max_keypoints (int): The maximum number of features.
+            match_threshold (float): The match threshold.
+            ransac_threshold (int): The RANSAC threshold.
+            ransac_confidence (float): The RANSAC confidence.
+            ransac_max_iter (int): The maximum number of RANSAC iterations.
+            scene_graph (bool): Whether to compute the scene graph.
+            global_feature (str): Whether to compute the global feature.
+            top_k (int): The number of image-pair to use.
+            mapper_refine_focal_length (bool): Whether to refine the focal length.
+            mapper_refine_principle_points (bool): Whether to refine the principle points.
+            mapper_refine_extra_params (bool): Whether to refine the extra parameters.
+        Returns:
+            Path: The directory containing the SfM results.
+        """
+        if len(images) == 0:
+            logger.error(f"{images} does not exist.")
+        temp_images = Path(tempfile.mkdtemp())
+        # copy images
+        logger.info(f"Copying images to {temp_images}.")
+        for image in images:
+            shutil.copy(image, temp_images)
+        matcher_zoo = self.cfg["matcher_zoo"]
+        model = matcher_zoo[key]
+        match_conf = model["matcher"]
+        match_conf["model"]["max_keypoints"] = max_keypoints
+        match_conf["model"]["match_threshold"] = match_threshold
+        feature_conf = model["feature"]
+        feature_conf["model"]["max_keypoints"] = max_keypoints
+        feature_conf["model"]["keypoint_threshold"] = keypoint_threshold
+        # retrieval
+        retrieval_name = self.cfg.get("retrieval_name", "netvlad")
+        retrieval_conf = extract_features.confs[retrieval_name]
+        mapper_options = {
+            "ba_refine_extra_params": mapper_refine_extra_params,
+            "ba_refine_focal_length": mapper_refine_focal_length,
+            "ba_refine_principal_point": mapper_refine_principle_points,
+            "ba_local_max_num_iterations": 40,
+            "ba_local_max_refinements": 3,
+            "ba_global_max_num_iterations": 100,
+            # below 3 options are for individual/video data, for internet photos, they should be left
+            # default
+            "min_focal_length_ratio": 0.1,
+            "max_focal_length_ratio": 10,
+            "max_extra_param": 1e15,
+        }
+        sfm_dir = self.outputs / "sfm_{}".format(key)
+        sfm_pairs = self.outputs / "pairs-sfm.txt"
+        sfm_dir.mkdir(exist_ok=True, parents=True)
+        # extract features
+        retrieval_path = extract_features.main(
+            retrieval_conf, temp_images, self.outputs
+        )
+        pairs_from_retrieval.main(retrieval_path, sfm_pairs, num_matched=top_k)
+        feature_path = extract_features.main(feature_conf, temp_images, self.outputs)
+        # match features
+        match_path = match_features.main(
+            match_conf, sfm_pairs, feature_conf["output"], self.outputs
+        )
+        # reconstruction
+        already_sfm = False
+        if sfm_dir.exists():
+            try:
+                model = pycolmap.Reconstruction(str(sfm_dir))
+                already_sfm = True
+            except ValueError:
+                logger.info(f"sfm_dir not exists model: {sfm_dir}")
+        if not already_sfm:
+            model = reconstruction.main(
+                sfm_dir,
+                temp_images,
+                sfm_pairs,
+                feature_path,
+                match_path,
+                mapper_options=mapper_options,
+            )
+        vertices = []
+        for point3D_id, point3D in model.points3D.items():
+            vertices.append([point3D.xyz, point3D.color])
+        model_3d = sfm_dir / "points3D.obj"
+        with open(model_3d, "w") as f:
+            for p, c in vertices:
+                # Write vertex position
+                f.write("v {} {} {}\n".format(p[0], p[1], p[2]))
+                # Write vertex normal (color)
+                f.write(
+                    "vn {} {} {}\n".format(c[0] / 255.0, c[1] / 255.0, c[2] / 255.0)
+                )
+        viz_2d = visualization.visualize_sfm_2d(
+            model, temp_images, color_by="visibility", n=2, dpi=300
+        )
+        return model_3d, fig2im(viz_2d) / 255.0

imcui/ui/utils.py CHANGED Viewed

@@ -1,1164 +1,1108 @@
-import os
-import pickle
-import random
-import shutil
-import time
-import warnings
-from itertools import combinations
-from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
-from datasets import load_dataset
-import cv2
-import gradio as gr
-import matplotlib.pyplot as plt
-import numpy as np
-import poselib
-import psutil
-from PIL import Image
-from ..hloc import (
-    DEVICE,
-    extract_features,
-    extractors,
-    logger,
-    match_dense,
-    match_features,
-    matchers,
-    DATASETS_REPO_ID,
-)
-from ..hloc.utils.base_model import dynamic_load
-from .viz import display_keypoints, display_matches, fig2im, plot_images
-warnings.simplefilter("ignore")
-ROOT = Path(__file__).parents[1]
-# some default values
-DEFAULT_SETTING_THRESHOLD = 0.1
-DEFAULT_SETTING_MAX_FEATURES = 2000
-DEFAULT_DEFAULT_KEYPOINT_THRESHOLD = 0.01
-DEFAULT_ENABLE_RANSAC = True
-DEFAULT_RANSAC_METHOD = "CV2_USAC_MAGSAC"
-DEFAULT_RANSAC_REPROJ_THRESHOLD = 8
-DEFAULT_RANSAC_CONFIDENCE = 0.9999
-DEFAULT_RANSAC_MAX_ITER = 10000
-DEFAULT_MIN_NUM_MATCHES = 4
-DEFAULT_MATCHING_THRESHOLD = 0.2
-DEFAULT_SETTING_GEOMETRY = "Homography"
-GRADIO_VERSION = gr.__version__.split(".")[0]
-MATCHER_ZOO = None
-class ModelCache:
-    def __init__(self, max_memory_size: int = 8):
-        self.max_memory_size = max_memory_size
-        self.current_memory_size = 0
-        self.model_dict = {}
-        self.model_timestamps = []
-    def cache_model(self, model_key, model_loader_func, model_conf):
-        if model_key in self.model_dict:
-            self.model_timestamps.remove(model_key)
-            self.model_timestamps.append(model_key)
-            logger.info(f"Load cached {model_key}")
-            return self.model_dict[model_key]
-        model = self._load_model_from_disk(model_loader_func, model_conf)
-        while self._calculate_model_memory() > self.max_memory_size:
-            if len(self.model_timestamps) == 0:
-                logger.warn(
-                    "RAM: {}GB, MAX RAM: {}GB".format(
-                        self._calculate_model_memory(), self.max_memory_size
-                    )
-                )
-                break
-            oldest_model_key = self.model_timestamps.pop(0)
-            self.current_memory_size = self._calculate_model_memory()
-            logger.info(f"Del cached {oldest_model_key}")
-            del self.model_dict[oldest_model_key]
-        self.model_dict[model_key] = model
-        self.model_timestamps.append(model_key)
-        self.print_memory_usage()
-        logger.info(f"Total cached {list(self.model_dict.keys())}")
-        return model
-    def _load_model_from_disk(self, model_loader_func, model_conf):
-        return model_loader_func(model_conf)
-    def _calculate_model_memory(self, verbose=False):
-        host_colocation = int(os.environ.get("HOST_COLOCATION", "1"))
-        vm = psutil.virtual_memory()
-        du = shutil.disk_usage(".")
-        if verbose:
-            logger.info(
-                f"RAM: {vm.used / 1e9:.1f}/{vm.total / host_colocation / 1e9:.1f}GB"
-            )
-            logger.info(
-                f"DISK: {du.used / 1e9:.1f}/{du.total / host_colocation / 1e9:.1f}GB"
-            )
-        return vm.used / 1e9
-    def print_memory_usage(self):
-        self._calculate_model_memory(verbose=True)
-model_cache = ModelCache()
-def load_config(config_name: str) -> Dict[str, Any]:
-    """
-    Load a YAML configuration file.
-    Args:
-        config_name: The path to the YAML configuration file.
-    Returns:
-        The configuration dictionary, with string keys and arbitrary values.
-    """
-    import yaml
-    with open(config_name, "r") as stream:
-        try:
-            config: Dict[str, Any] = yaml.safe_load(stream)
-        except yaml.YAMLError as exc:
-            logger.error(exc)
-    return config
-def get_matcher_zoo(
-    matcher_zoo: Dict[str, Dict[str, Union[str, bool]]],
-) -> Dict[str, Dict[str, Union[Callable, bool]]]:
-    """
-    Restore matcher configurations from a dictionary.
-    Args:
-        matcher_zoo: A dictionary with the matcher configurations,
-            where the configuration is a dictionary as loaded from a YAML file.
-    Returns:
-        A dictionary with the matcher configurations, where the configuration is
-            a function or a function instead of a string.
-    """
-    matcher_zoo_restored = {}
-    for k, v in matcher_zoo.items():
-        matcher_zoo_restored[k] = parse_match_config(v)
-    return matcher_zoo_restored
-def parse_match_config(conf):
-    if conf["dense"]:
-        return {
-            "matcher": match_dense.confs.get(conf["matcher"]),
-            "dense": True,
-            "info": conf.get("info", {}),
-        }
-    else:
-        return {
-            "feature": extract_features.confs.get(conf["feature"]),
-            "matcher": match_features.confs.get(conf["matcher"]),
-            "dense": False,
-            "info": conf.get("info", {}),
-        }
-def get_model(match_conf: Dict[str, Any]):
-    """
-    Load a matcher model from the provided configuration.
-    Args:
-        match_conf: A dictionary containing the model configuration.
-    Returns:
-        A matcher model instance.
-    """
-    Model = dynamic_load(matchers, match_conf["model"]["name"])
-    model = Model(match_conf["model"]).eval().to(DEVICE)
-    return model
-def get_feature_model(conf: Dict[str, Dict[str, Any]]):
-    """
-    Load a feature extraction model from the provided configuration.
-    Args:
-        conf: A dictionary containing the model configuration.
-    Returns:
-        A feature extraction model instance.
-    """
-    Model = dynamic_load(extractors, conf["model"]["name"])
-    model = Model(conf["model"]).eval().to(DEVICE)
-    return model
-def download_example_images(repo_id, output_dir):
-    logger.info(f"Download example dataset from huggingface: {repo_id}")
-    dataset = load_dataset(repo_id)
-    Path(output_dir).mkdir(parents=True, exist_ok=True)
-    for example in dataset["train"]:  # Assuming the dataset is in the "train" split
-        file_path = example["path"]
-        image = example["image"]  # Access the PIL.Image object directly
-        full_path = os.path.join(output_dir, file_path)
-        Path(os.path.dirname(full_path)).mkdir(parents=True, exist_ok=True)
-        image.save(full_path)
-    logger.info(f"Images saved to {output_dir} successfully.")
-    return Path(output_dir)
-def gen_examples(data_root: Path):
-    random.seed(1)
-    example_matchers = [
-        "disk+lightglue",
-        "xfeat(sparse)",
-        "dedode",
-        "loftr",
-        "disk",
-        "RoMa",
-        "d2net",
-        "aspanformer",
-        "topicfm",
-        "superpoint+superglue",
-        "superpoint+lightglue",
-        "superpoint+mnn",
-        "disk",
-    ]
-    data_root = Path(data_root)
-    if not Path(data_root).exists():
-        try:
-            download_example_images(DATASETS_REPO_ID, data_root)
-        except Exception as e:
-            logger.error(f"download_example_images error : {e}")
-            data_root = ROOT / "datasets"
-    if not Path(data_root / "sacre_coeur/mapping").exists():
-        download_example_images(DATASETS_REPO_ID, data_root)
-    def distribute_elements(A, B):
-        new_B = np.array(B, copy=True).flatten()
-        np.random.shuffle(new_B)
-        new_B = np.resize(new_B, len(A))
-        np.random.shuffle(new_B)
-        return new_B.tolist()
-    # normal examples
-    def gen_images_pairs(count: int = 5):
-        path = str(data_root / "sacre_coeur/mapping")
-        imgs_list = [
-            os.path.join(path, file)
-            for file in os.listdir(path)
-            if file.lower().endswith((".jpg", ".jpeg", ".png"))
-        ]
-        pairs = list(combinations(imgs_list, 2))
-        if len(pairs) < count:
-            count = len(pairs)
-        selected = random.sample(range(len(pairs)), count)
-        return [pairs[i] for i in selected]
-    # rotated examples
-    def gen_rot_image_pairs(count: int = 5):
-        path = data_root / "sacre_coeur/mapping"
-        path_rot = data_root / "sacre_coeur/mapping_rot"
-        rot_list = [45, 180, 90, 225, 270]
-        pairs = []
-        for file in os.listdir(path):
-            if file.lower().endswith((".jpg", ".jpeg", ".png")):
-                for rot in rot_list:
-                    file_rot = "{}_rot{}.jpg".format(Path(file).stem, rot)
-                    if (path_rot / file_rot).exists():
-                        pairs.append(
-                            [
-                                path / file,
-                                path_rot / file_rot,
-                            ]
-                        )
-        if len(pairs) < count:
-            count = len(pairs)
-        selected = random.sample(range(len(pairs)), count)
-        return [pairs[i] for i in selected]
-    def gen_scale_image_pairs(count: int = 5):
-        path = data_root / "sacre_coeur/mapping"
-        path_scale = data_root / "sacre_coeur/mapping_scale"
-        scale_list = [0.3, 0.5]
-        pairs = []
-        for file in os.listdir(path):
-            if file.lower().endswith((".jpg", ".jpeg", ".png")):
-                for scale in scale_list:
-                    file_scale = "{}_scale{}.jpg".format(Path(file).stem, scale)
-                    if (path_scale / file_scale).exists():
-                        pairs.append(
-                            [
-                                path / file,
-                                path_scale / file_scale,
-                            ]
-                        )
-        if len(pairs) < count:
-            count = len(pairs)
-        selected = random.sample(range(len(pairs)), count)
-        return [pairs[i] for i in selected]
-    # extramely hard examples
-    def gen_image_pairs_wxbs(count: int = None):
-        prefix = "wxbs_benchmark/.WxBS/v1.1"
-        wxbs_path = data_root / prefix
-        pairs = []
-        for catg in os.listdir(wxbs_path):
-            catg_path = wxbs_path / catg
-            if not catg_path.is_dir():
-                continue
-            for scene in os.listdir(catg_path):
-                scene_path = catg_path / scene
-                if not scene_path.is_dir():
-                    continue
-                img1_path = scene_path / "01.png"
-                img2_path = scene_path / "02.png"
-                if img1_path.exists() and img2_path.exists():
-                    pairs.append([str(img1_path), str(img2_path)])
-        return pairs
-    # image pair path
-    pairs = gen_images_pairs()
-    pairs += gen_rot_image_pairs()
-    pairs += gen_scale_image_pairs()
-    pairs += gen_image_pairs_wxbs()
-    match_setting_threshold = DEFAULT_SETTING_THRESHOLD
-    match_setting_max_features = DEFAULT_SETTING_MAX_FEATURES
-    detect_keypoints_threshold = DEFAULT_DEFAULT_KEYPOINT_THRESHOLD
-    ransac_method = DEFAULT_RANSAC_METHOD
-    ransac_reproj_threshold = DEFAULT_RANSAC_REPROJ_THRESHOLD
-    ransac_confidence = DEFAULT_RANSAC_CONFIDENCE
-    ransac_max_iter = DEFAULT_RANSAC_MAX_ITER
-    input_lists = []
-    dist_examples = distribute_elements(pairs, example_matchers)
-    for pair, mt in zip(pairs, dist_examples):
-        input_lists.append(
-            [
-                pair[0],
-                pair[1],
-                match_setting_threshold,
-                match_setting_max_features,
-                detect_keypoints_threshold,
-                mt,
-                # enable_ransac,
-                ransac_method,
-                ransac_reproj_threshold,
-                ransac_confidence,
-                ransac_max_iter,
-            ]
-        )
-    return input_lists
-def set_null_pred(feature_type: str, pred: dict):
-    if feature_type == "KEYPOINT":
-        pred["mmkeypoints0_orig"] = np.array([])
-        pred["mmkeypoints1_orig"] = np.array([])
-        pred["mmconf"] = np.array([])
-    elif feature_type == "LINE":
-        pred["mline_keypoints0_orig"] = np.array([])
-        pred["mline_keypoints1_orig"] = np.array([])
-    pred["H"] = None
-    pred["geom_info"] = {}
-    return pred
-def _filter_matches_opencv(
-    kp0: np.ndarray,
-    kp1: np.ndarray,
-    method: int = cv2.RANSAC,
-    reproj_threshold: float = 3.0,
-    confidence: float = 0.99,
-    max_iter: int = 2000,
-    geometry_type: str = "Homography",
-) -> Tuple[np.ndarray, np.ndarray]:
-    """
-    Filters matches between two sets of keypoints using OpenCV's findHomography.
-    Args:
-        kp0 (np.ndarray): Array of keypoints from the first image.
-        kp1 (np.ndarray): Array of keypoints from the second image.
-        method (int, optional): RANSAC method. Defaults to "cv2.RANSAC".
-        reproj_threshold (float, optional): RANSAC reprojection threshold. Defaults to 3.0.
-        confidence (float, optional): RANSAC confidence. Defaults to 0.99.
-        max_iter (int, optional): RANSAC maximum iterations. Defaults to 2000.
-        geometry_type (str, optional): Type of geometry. Defaults to "Homography".
-    Returns:
-        Tuple[np.ndarray, np.ndarray]: Homography matrix and mask.
-    """
-    if geometry_type == "Homography":
-        try:
-            M, mask = cv2.findHomography(
-                kp0,
-                kp1,
-                method=method,
-                ransacReprojThreshold=reproj_threshold,
-                confidence=confidence,
-                maxIters=max_iter,
-            )
-        except cv2.error:
-            logger.error("compute findHomography error, len(kp0): {}".format(len(kp0)))
-            return None, None
-    elif geometry_type == "Fundamental":
-        try:
-            M, mask = cv2.findFundamentalMat(
-                kp0,
-                kp1,
-                method=method,
-                ransacReprojThreshold=reproj_threshold,
-                confidence=confidence,
-                maxIters=max_iter,
-            )
-        except cv2.error:
-            logger.error(
-                "compute findFundamentalMat error, len(kp0): {}".format(len(kp0))
-            )
-            return None, None
-    mask = np.array(mask.ravel().astype("bool"), dtype="bool")
-    return M, mask
-def _filter_matches_poselib(
-    kp0: np.ndarray,
-    kp1: np.ndarray,
-    method: int = None,  # not used
-    reproj_threshold: float = 3,
-    confidence: float = 0.99,
-    max_iter: int = 2000,
-    geometry_type: str = "Homography",
-) -> dict:
-    """
-    Filters matches between two sets of keypoints using the poselib library.
-    Args:
-        kp0 (np.ndarray): Array of keypoints from the first image.
-        kp1 (np.ndarray): Array of keypoints from the second image.
-        method (str, optional): RANSAC method. Defaults to "RANSAC".
-        reproj_threshold (float, optional): RANSAC reprojection threshold. Defaults to 3.
-        confidence (float, optional): RANSAC confidence. Defaults to 0.99.
-        max_iter (int, optional): RANSAC maximum iterations. Defaults to 2000.
-        geometry_type (str, optional): Type of geometry. Defaults to "Homography".
-    Returns:
-        dict: Information about the homography estimation.
-    """
-    ransac_options = {
-        "max_iterations": max_iter,
-        # "min_iterations":  min_iter,
-        "success_prob": confidence,
-        "max_reproj_error": reproj_threshold,
-        # "progressive_sampling": args.sampler.lower() == 'prosac'
-    }
-    if geometry_type == "Homography":
-        M, info = poselib.estimate_homography(kp0, kp1, ransac_options)
-    elif geometry_type == "Fundamental":
-        M, info = poselib.estimate_fundamental(kp0, kp1, ransac_options)
-    else:
-        raise NotImplementedError
-    return M, np.array(info["inliers"])
-def proc_ransac_matches(
-    mkpts0: np.ndarray,
-    mkpts1: np.ndarray,
-    ransac_method: str = DEFAULT_RANSAC_METHOD,
-    ransac_reproj_threshold: float = 3.0,
-    ransac_confidence: float = 0.99,
-    ransac_max_iter: int = 2000,
-    geometry_type: str = "Homography",
-):
-    if ransac_method.startswith("CV2"):
-        logger.info(f"ransac_method: {ransac_method}, geometry_type: {geometry_type}")
-        return _filter_matches_opencv(
-            mkpts0,
-            mkpts1,
-            ransac_zoo[ransac_method],
-            ransac_reproj_threshold,
-            ransac_confidence,
-            ransac_max_iter,
-            geometry_type,
-        )
-    elif ransac_method.startswith("POSELIB"):
-        logger.info(f"ransac_method: {ransac_method}, geometry_type: {geometry_type}")
-        return _filter_matches_poselib(
-            mkpts0,
-            mkpts1,
-            None,
-            ransac_reproj_threshold,
-            ransac_confidence,
-            ransac_max_iter,
-            geometry_type,
-        )
-    else:
-        raise NotImplementedError
-def filter_matches(
-    pred: Dict[str, Any],
-    ransac_method: str = DEFAULT_RANSAC_METHOD,
-    ransac_reproj_threshold: float = DEFAULT_RANSAC_REPROJ_THRESHOLD,
-    ransac_confidence: float = DEFAULT_RANSAC_CONFIDENCE,
-    ransac_max_iter: int = DEFAULT_RANSAC_MAX_ITER,
-    ransac_estimator: str = None,
-):
-    """
-    Filter matches using RANSAC. If keypoints are available, filter by keypoints.
-    If lines are available, filter by lines. If both keypoints and lines are
-    available, filter by keypoints.
-    Args:
-        pred (Dict[str, Any]): dict of matches, including original keypoints.
-        ransac_method (str, optional): RANSAC method. Defaults to DEFAULT_RANSAC_METHOD.
-        ransac_reproj_threshold (float, optional): RANSAC reprojection threshold. Defaults to DEFAULT_RANSAC_REPROJ_THRESHOLD.
-        ransac_confidence (float, optional): RANSAC confidence. Defaults to DEFAULT_RANSAC_CONFIDENCE.
-        ransac_max_iter (int, optional): RANSAC maximum iterations. Defaults to DEFAULT_RANSAC_MAX_ITER.
-    Returns:
-        Dict[str, Any]: filtered matches.
-    """
-    mkpts0: Optional[np.ndarray] = None
-    mkpts1: Optional[np.ndarray] = None
-    feature_type: Optional[str] = None
-    if "mkeypoints0_orig" in pred.keys() and "mkeypoints1_orig" in pred.keys():
-        mkpts0 = pred["mkeypoints0_orig"]
-        mkpts1 = pred["mkeypoints1_orig"]
-        feature_type = "KEYPOINT"
-    elif (
-        "line_keypoints0_orig" in pred.keys() and "line_keypoints1_orig" in pred.keys()
-    ):
-        mkpts0 = pred["line_keypoints0_orig"]
-        mkpts1 = pred["line_keypoints1_orig"]
-        feature_type = "LINE"
-    else:
-        return set_null_pred(feature_type, pred)
-    if mkpts0 is None or mkpts0 is None:
-        return set_null_pred(feature_type, pred)
-    if ransac_method not in ransac_zoo.keys():
-        ransac_method = DEFAULT_RANSAC_METHOD
-    if len(mkpts0) < DEFAULT_MIN_NUM_MATCHES:
-        return set_null_pred(feature_type, pred)
-    geom_info = compute_geometry(
-        pred,
-        ransac_method=ransac_method,
-        ransac_reproj_threshold=ransac_reproj_threshold,
-        ransac_confidence=ransac_confidence,
-        ransac_max_iter=ransac_max_iter,
-    )
-    if "Homography" in geom_info.keys():
-        mask = geom_info["mask_h"]
-        if feature_type == "KEYPOINT":
-            pred["mmkeypoints0_orig"] = mkpts0[mask]
-            pred["mmkeypoints1_orig"] = mkpts1[mask]
-            pred["mmconf"] = pred["mconf"][mask]
-        elif feature_type == "LINE":
-            pred["mline_keypoints0_orig"] = mkpts0[mask]
-            pred["mline_keypoints1_orig"] = mkpts1[mask]
-        pred["H"] = np.array(geom_info["Homography"])
-    else:
-        set_null_pred(feature_type, pred)
-    # do not show mask
-    geom_info.pop("mask_h", None)
-    geom_info.pop("mask_f", None)
-    pred["geom_info"] = geom_info
-    return pred
-def compute_geometry(
-    pred: Dict[str, Any],
-    ransac_method: str = DEFAULT_RANSAC_METHOD,
-    ransac_reproj_threshold: float = DEFAULT_RANSAC_REPROJ_THRESHOLD,
-    ransac_confidence: float = DEFAULT_RANSAC_CONFIDENCE,
-    ransac_max_iter: int = DEFAULT_RANSAC_MAX_ITER,
-) -> Dict[str, List[float]]:
-    """
-    Compute geometric information of matches, including Fundamental matrix,
-    Homography matrix, and rectification matrices (if available).
-    Args:
-        pred (Dict[str, Any]): dict of matches, including original keypoints.
-        ransac_method (str, optional): RANSAC method. Defaults to DEFAULT_RANSAC_METHOD.
-        ransac_reproj_threshold (float, optional): RANSAC reprojection threshold. Defaults to DEFAULT_RANSAC_REPROJ_THRESHOLD.
-        ransac_confidence (float, optional): RANSAC confidence. Defaults to DEFAULT_RANSAC_CONFIDENCE.
-        ransac_max_iter (int, optional): RANSAC maximum iterations. Defaults to DEFAULT_RANSAC_MAX_ITER.
-    Returns:
-        Dict[str, List[float]]: geometric information in form of a dict.
-    """
-    mkpts0: Optional[np.ndarray] = None
-    mkpts1: Optional[np.ndarray] = None
-    if "mkeypoints0_orig" in pred.keys() and "mkeypoints1_orig" in pred.keys():
-        mkpts0 = pred["mkeypoints0_orig"]
-        mkpts1 = pred["mkeypoints1_orig"]
-    elif (
-        "line_keypoints0_orig" in pred.keys() and "line_keypoints1_orig" in pred.keys()
-    ):
-        mkpts0 = pred["line_keypoints0_orig"]
-        mkpts1 = pred["line_keypoints1_orig"]
-    if mkpts0 is not None and mkpts1 is not None:
-        if len(mkpts0) < 2 * DEFAULT_MIN_NUM_MATCHES:
-            return {}
-        geo_info: Dict[str, List[float]] = {}
-        F, mask_f = proc_ransac_matches(
-            mkpts0,
-            mkpts1,
-            ransac_method,
-            ransac_reproj_threshold,
-            ransac_confidence,
-            ransac_max_iter,
-            geometry_type="Fundamental",
-        )
-        if F is not None:
-            geo_info["Fundamental"] = F.tolist()
-            geo_info["mask_f"] = mask_f
-        H, mask_h = proc_ransac_matches(
-            mkpts1,
-            mkpts0,
-            ransac_method,
-            ransac_reproj_threshold,
-            ransac_confidence,
-            ransac_max_iter,
-            geometry_type="Homography",
-        )
-        h0, w0, _ = pred["image0_orig"].shape
-        if H is not None:
-            geo_info["Homography"] = H.tolist()
-            geo_info["mask_h"] = mask_h
-            try:
-                _, H1, H2 = cv2.stereoRectifyUncalibrated(
-                    mkpts0.reshape(-1, 2),
-                    mkpts1.reshape(-1, 2),
-                    F,
-                    imgSize=(w0, h0),
-                )
-                geo_info["H1"] = H1.tolist()
-                geo_info["H2"] = H2.tolist()
-            except cv2.error as e:
-                logger.error(f"StereoRectifyUncalibrated failed, skip! error: {e}")
-        return geo_info
-    else:
-        return {}
-def wrap_images(
-    img0: np.ndarray,
-    img1: np.ndarray,
-    geo_info: Optional[Dict[str, List[float]]],
-    geom_type: str,
-) -> Tuple[Optional[str], Optional[Dict[str, List[float]]]]:
-    """
-    Wraps the images based on the geometric transformation used to align them.
-    Args:
-        img0: numpy array representing the first image.
-        img1: numpy array representing the second image.
-        geo_info: dictionary containing the geometric transformation information.
-        geom_type: type of geometric transformation used to align the images.
-    Returns:
-        A tuple containing a base64 encoded image string and a dictionary with the transformation matrix.
-    """
-    h0, w0, _ = img0.shape
-    h1, w1, _ = img1.shape
-    if geo_info is not None and len(geo_info) != 0:
-        rectified_image0 = img0
-        rectified_image1 = None
-        if "Homography" not in geo_info:
-            logger.warning(f"{geom_type} not exist, maybe too less matches")
-            return None, None
-        H = np.array(geo_info["Homography"])
-        title: List[str] = []
-        if geom_type == "Homography":
-            rectified_image1 = cv2.warpPerspective(img1, H, (w0, h0))
-            title = ["Image 0", "Image 1 - warped"]
-        elif geom_type == "Fundamental":
-            if geom_type not in geo_info:
-                logger.warning(f"{geom_type} not exist, maybe too less matches")
-                return None, None
-            else:
-                H1, H2 = np.array(geo_info["H1"]), np.array(geo_info["H2"])
-                rectified_image0 = cv2.warpPerspective(img0, H1, (w0, h0))
-                rectified_image1 = cv2.warpPerspective(img1, H2, (w1, h1))
-                title = ["Image 0 - warped", "Image 1 - warped"]
-        else:
-            print("Error: Unknown geometry type")
-        fig = plot_images(
-            [rectified_image0.squeeze(), rectified_image1.squeeze()],
-            title,
-            dpi=300,
-        )
-        return fig2im(fig), rectified_image1
-    else:
-        return None, None
-def generate_warp_images(
-    input_image0: np.ndarray,
-    input_image1: np.ndarray,
-    matches_info: Dict[str, Any],
-    choice: str,
-) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
-    """
-    Changes the estimate of the geometric transformation used to align the images.
-    Args:
-        input_image0: First input image.
-        input_image1: Second input image.
-        matches_info: Dictionary containing information about the matches.
-        choice: Type of geometric transformation to use ('Homography' or 'Fundamental') or 'No' to disable.
-    Returns:
-        A tuple containing the updated images and the warpped images.
-    """
-    if (
-        matches_info is None
-        or len(matches_info) < 1
-        or "geom_info" not in matches_info.keys()
-    ):
-        return None, None
-    geom_info = matches_info["geom_info"]
-    warped_image = None
-    if choice != "No":
-        wrapped_image_pair, warped_image = wrap_images(
-            input_image0, input_image1, geom_info, choice
-        )
-        return wrapped_image_pair, warped_image
-    else:
-        return None, None
-def send_to_match(state_cache: Dict[str, Any]):
-    """
-    Send the state cache to the match function.
-    Args:
-        state_cache (Dict[str, Any]): Current state of the app.
-    Returns:
-        None
-    """
-    if state_cache:
-        return (
-            state_cache["image0_orig"],
-            state_cache["wrapped_image"],
-        )
-    else:
-        return None, None
-def run_ransac(
-    state_cache: Dict[str, Any],
-    choice_geometry_type: str,
-    ransac_method: str = DEFAULT_RANSAC_METHOD,
-    ransac_reproj_threshold: int = DEFAULT_RANSAC_REPROJ_THRESHOLD,
-    ransac_confidence: float = DEFAULT_RANSAC_CONFIDENCE,
-    ransac_max_iter: int = DEFAULT_RANSAC_MAX_ITER,
-) -> Tuple[Optional[np.ndarray], Optional[Dict[str, int]]]:
-    """
-    Run RANSAC matches and return the output images and the number of matches.
-    Args:
-        state_cache (Dict[str, Any]): Current state of the app, including the matches.
-        ransac_method (str, optional): RANSAC method. Defaults to DEFAULT_RANSAC_METHOD.
-        ransac_reproj_threshold (int, optional): RANSAC reprojection threshold. Defaults to DEFAULT_RANSAC_REPROJ_THRESHOLD.
-        ransac_confidence (float, optional): RANSAC confidence. Defaults to DEFAULT_RANSAC_CONFIDENCE.
-        ransac_max_iter (int, optional): RANSAC maximum iterations. Defaults to DEFAULT_RANSAC_MAX_ITER.
-    Returns:
-        Tuple[Optional[np.ndarray], Optional[Dict[str, int]]]: Tuple containing the output images and the number of matches.
-    """
-    if not state_cache:
-        logger.info("Run Match first before Rerun RANSAC")
-        gr.Warning("Run Match first before Rerun RANSAC")
-        return None, None
-    t1 = time.time()
-    logger.info(
-        f"Run RANSAC matches using: {ransac_method} with threshold: {ransac_reproj_threshold}"
-    )
-    logger.info(
-        f"Run RANSAC matches using: {ransac_confidence} with iter: {ransac_max_iter}"
-    )
-    # if enable_ransac:
-    filter_matches(
-        state_cache,
-        ransac_method=ransac_method,
-        ransac_reproj_threshold=ransac_reproj_threshold,
-        ransac_confidence=ransac_confidence,
-        ransac_max_iter=ransac_max_iter,
-    )
-    logger.info(f"RANSAC matches done using: {time.time()-t1:.3f}s")
-    t1 = time.time()
-    # plot images with ransac matches
-    titles = [
-        "Image 0 - Ransac matched keypoints",
-        "Image 1 - Ransac matched keypoints",
-    ]
-    output_matches_ransac, num_matches_ransac = display_matches(
-        state_cache, titles=titles, tag="KPTS_RANSAC"
-    )
-    logger.info(f"Display matches done using: {time.time()-t1:.3f}s")
-    t1 = time.time()
-    # compute warp images
-    output_wrapped, warped_image = generate_warp_images(
-        state_cache["image0_orig"],
-        state_cache["image1_orig"],
-        state_cache,
-        choice_geometry_type,
-    )
-    plt.close("all")
-    num_matches_raw = state_cache["num_matches_raw"]
-    state_cache["wrapped_image"] = warped_image
-    # tmp_state_cache = tempfile.NamedTemporaryFile(suffix='.pkl', delete=False)
-    tmp_state_cache = "output.pkl"
-    with open(tmp_state_cache, "wb") as f:
-        pickle.dump(state_cache, f)
-    logger.info("Dump results done!")
-    return (
-        output_matches_ransac,
-        {
-            "num_matches_raw": num_matches_raw,
-            "num_matches_ransac": num_matches_ransac,
-        },
-        output_wrapped,
-        tmp_state_cache,
-    )
-def generate_fake_outputs(
-    output_keypoints,
-    output_matches_raw,
-    output_matches_ransac,
-    match_conf,
-    extract_conf,
-    pred,
-):
-    return (
-        output_keypoints,
-        output_matches_raw,
-        output_matches_ransac,
-        {},
-        {
-            "match_conf": match_conf,
-            "extractor_conf": extract_conf,
-        },
-        {
-            "geom_info": pred.get("geom_info", {}),
-        },
-        None,
-        None,
-        None,
-    )
-def run_matching(
-    image0: np.ndarray,
-    image1: np.ndarray,
-    match_threshold: float,
-    extract_max_keypoints: int,
-    keypoint_threshold: float,
-    key: str,
-    ransac_method: str = DEFAULT_RANSAC_METHOD,
-    ransac_reproj_threshold: int = DEFAULT_RANSAC_REPROJ_THRESHOLD,
-    ransac_confidence: float = DEFAULT_RANSAC_CONFIDENCE,
-    ransac_max_iter: int = DEFAULT_RANSAC_MAX_ITER,
-    choice_geometry_type: str = DEFAULT_SETTING_GEOMETRY,
-    matcher_zoo: Dict[str, Any] = None,
-    force_resize: bool = False,
-    image_width: int = 640,
-    image_height: int = 480,
-    use_cached_model: bool = False,
-) -> Tuple[
-    np.ndarray,
-    np.ndarray,
-    np.ndarray,
-    Dict[str, int],
-    Dict[str, Dict[str, Any]],
-    Dict[str, Dict[str, float]],
-    np.ndarray,
-]:
-    """Match two images using the given parameters.
-    Args:
-        image0 (np.ndarray): RGB image 0.
-        image1 (np.ndarray): RGB image 1.
-        match_threshold (float): match threshold.
-        extract_max_keypoints (int): number of keypoints to extract.
-        keypoint_threshold (float): keypoint threshold.
-        key (str): key of the model to use.
-        ransac_method (str, optional): RANSAC method to use.
-        ransac_reproj_threshold (int, optional): RANSAC reprojection threshold.
-        ransac_confidence (float, optional): RANSAC confidence level.
-        ransac_max_iter (int, optional): RANSAC maximum number of iterations.
-        choice_geometry_type (str, optional): setting of geometry estimation.
-        matcher_zoo (Dict[str, Any], optional): matcher zoo. Defaults to None.
-        force_resize (bool, optional): force resize. Defaults to False.
-        image_width (int, optional): image width. Defaults to 640.
-        image_height (int, optional): image height. Defaults to 480.
-        use_cached_model (bool, optional): use cached model. Defaults to False.
-    Returns:
-        tuple:
-            - output_keypoints (np.ndarray): image with keypoints.
-            - output_matches_raw (np.ndarray): image with raw matches.
-            - output_matches_ransac (np.ndarray): image with RANSAC matches.
-            - num_matches (Dict[str, int]): number of raw and RANSAC matches.
-            - configs (Dict[str, Dict[str, Any]]): match and feature extraction configs.
-            - geom_info (Dict[str, Dict[str, float]]): geometry information.
-            - output_wrapped (np.ndarray): wrapped images.
-    """
-    # image0 and image1 is RGB mode
-    if image0 is None or image1 is None:
-        logger.error(
-            "Error: No images found! Please upload two images or select an example."
-        )
-        raise gr.Error(
-            "Error: No images found! Please upload two images or select an example."
-        )
-    # init output
-    output_keypoints = None
-    output_matches_raw = None
-    output_matches_ransac = None
-    t0 = time.time()
-    model = matcher_zoo[key]
-    match_conf = model["matcher"]
-    # update match config
-    match_conf["model"]["match_threshold"] = match_threshold
-    match_conf["model"]["max_keypoints"] = extract_max_keypoints
-    cache_key = "{}_{}".format(key, match_conf["model"]["name"])
-    efficiency = model["info"].get("efficiency", "high")
-    if efficiency == "low":
-        gr.Warning(
-            "Matcher {} is time-consuming, please wait for a while".format(
-                model["info"].get("name", "unknown")
-            )
-        )
-    if use_cached_model:
-        # because of the model cache, we need to update the config
-        matcher = model_cache.cache_model(cache_key, get_model, match_conf)
-        matcher.conf["max_keypoints"] = extract_max_keypoints
-        matcher.conf["match_threshold"] = match_threshold
-        logger.info(f"Loaded cached model {cache_key}")
-    else:
-        matcher = get_model(match_conf)
-    logger.info(f"Loading model using: {time.time()-t0:.3f}s")
-    t1 = time.time()
-    yield generate_fake_outputs(
-        output_keypoints, output_matches_raw, output_matches_ransac, match_conf, {}, {}
-    )
-    if model["dense"]:
-        if not match_conf["preprocessing"].get("force_resize", False):
-            match_conf["preprocessing"]["force_resize"] = force_resize
-        else:
-            logger.info("preprocessing is already resized")
-        if force_resize:
-            match_conf["preprocessing"]["height"] = image_height
-            match_conf["preprocessing"]["width"] = image_width
-            logger.info(f"Force resize to {image_width}x{image_height}")
-        pred = match_dense.match_images(
-            matcher, image0, image1, match_conf["preprocessing"], device=DEVICE
-        )
-        del matcher
-        extract_conf = None
-    else:
-        extract_conf = model["feature"]
-        # update extract config
-        extract_conf["model"]["max_keypoints"] = extract_max_keypoints
-        extract_conf["model"]["keypoint_threshold"] = keypoint_threshold
-        cache_key = "{}_{}".format(key, extract_conf["model"]["name"])
-        if use_cached_model:
-            extractor = model_cache.cache_model(
-                cache_key, get_feature_model, extract_conf
-            )
-            # because of the model cache, we need to update the config
-            extractor.conf["max_keypoints"] = extract_max_keypoints
-            extractor.conf["keypoint_threshold"] = keypoint_threshold
-            logger.info(f"Loaded cached model {cache_key}")
-        else:
-            extractor = get_feature_model(extract_conf)
-        if not extract_conf["preprocessing"].get("force_resize", False):
-            extract_conf["preprocessing"]["force_resize"] = force_resize
-        else:
-            logger.info("preprocessing is already resized")
-        if force_resize:
-            extract_conf["preprocessing"]["height"] = image_height
-            extract_conf["preprocessing"]["width"] = image_width
-            logger.info(f"Force resize to {image_width}x{image_height}")
-        pred0 = extract_features.extract(
-            extractor, image0, extract_conf["preprocessing"]
-        )
-        pred1 = extract_features.extract(
-            extractor, image1, extract_conf["preprocessing"]
-        )
-        pred = match_features.match_images(matcher, pred0, pred1)
-        del extractor
-    # gr.Info(
-    #     f"Matching images done using: {time.time()-t1:.3f}s",
-    # )
-    logger.info(f"Matching images done using: {time.time()-t1:.3f}s")
-    t1 = time.time()
-    # plot images with keypoints
-    titles = [
-        "Image 0 - Keypoints",
-        "Image 1 - Keypoints",
-    ]
-    output_keypoints = display_keypoints(pred, titles=titles)
-    yield generate_fake_outputs(
-        output_keypoints,
-        output_matches_raw,
-        output_matches_ransac,
-        match_conf,
-        extract_conf,
-        pred,
-    )
-    # plot images with raw matches
-    titles = [
-        "Image 0 - Raw matched keypoints",
-        "Image 1 - Raw matched keypoints",
-    ]
-    output_matches_raw, num_matches_raw = display_matches(pred, titles=titles)
-    yield generate_fake_outputs(
-        output_keypoints,
-        output_matches_raw,
-        output_matches_ransac,
-        match_conf,
-        extract_conf,
-        pred,
-    )
-    # if enable_ransac:
-    filter_matches(
-        pred,
-        ransac_method=ransac_method,
-        ransac_reproj_threshold=ransac_reproj_threshold,
-        ransac_confidence=ransac_confidence,
-        ransac_max_iter=ransac_max_iter,
-    )
-    # gr.Info(f"RANSAC matches done using: {time.time()-t1:.3f}s")
-    logger.info(f"RANSAC matches done using: {time.time()-t1:.3f}s")
-    t1 = time.time()
-    # plot images with ransac matches
-    titles = [
-        "Image 0 - Ransac matched keypoints",
-        "Image 1 - Ransac matched keypoints",
-    ]
-    output_matches_ransac, num_matches_ransac = display_matches(
-        pred, titles=titles, tag="KPTS_RANSAC"
-    )
-    yield generate_fake_outputs(
-        output_keypoints,
-        output_matches_raw,
-        output_matches_ransac,
-        match_conf,
-        extract_conf,
-        pred,
-    )
-    # gr.Info(f"Display matches done using: {time.time()-t1:.3f}s")
-    logger.info(f"Display matches done using: {time.time()-t1:.3f}s")
-    t1 = time.time()
-    # plot wrapped images
-    output_wrapped, warped_image = generate_warp_images(
-        pred["image0_orig"],
-        pred["image1_orig"],
-        pred,
-        choice_geometry_type,
-    )
-    plt.close("all")
-    # gr.Info(f"In summary, total time: {time.time()-t0:.3f}s")
-    logger.info(f"TOTAL time: {time.time()-t0:.3f}s")
-    state_cache = pred
-    state_cache["num_matches_raw"] = num_matches_raw
-    state_cache["num_matches_ransac"] = num_matches_ransac
-    state_cache["wrapped_image"] = warped_image
-    # tmp_state_cache = tempfile.NamedTemporaryFile(suffix='.pkl', delete=False)
-    tmp_state_cache = "output.pkl"
-    with open(tmp_state_cache, "wb") as f:
-        pickle.dump(state_cache, f)
-    logger.info("Dump results done!")
-    yield (
-        output_keypoints,
-        output_matches_raw,
-        output_matches_ransac,
-        {
-            "num_raw_matches": num_matches_raw,
-            "num_ransac_matches": num_matches_ransac,
-        },
-        {
-            "match_conf": match_conf,
-            "extractor_conf": extract_conf,
-        },
-        {
-            "geom_info": pred.get("geom_info", {}),
-        },
-        output_wrapped,
-        state_cache,
-        tmp_state_cache,
-    )
-# @ref: https://docs.opencv.org/4.x/d0/d74/md__build_4_x-contrib_docs-lin64_opencv_doc_tutorials_calib3d_usac.html
-# AND: https://opencv.org/blog/2021/06/09/evaluating-opencvs-new-ransacs
-ransac_zoo = {
-    "POSELIB": "LO-RANSAC",
-    "CV2_RANSAC": cv2.RANSAC,
-    "CV2_USAC_MAGSAC": cv2.USAC_MAGSAC,
-    "CV2_USAC_DEFAULT": cv2.USAC_DEFAULT,
-    "CV2_USAC_FM_8PTS": cv2.USAC_FM_8PTS,
-    "CV2_USAC_PROSAC": cv2.USAC_PROSAC,
-    "CV2_USAC_FAST": cv2.USAC_FAST,
-    "CV2_USAC_ACCURATE": cv2.USAC_ACCURATE,
-    "CV2_USAC_PARALLEL": cv2.USAC_PARALLEL,
-}
-def rotate_image(input_path, degrees, output_path):
-    img = Image.open(input_path)
-    img_rotated = img.rotate(-degrees)
-    img_rotated.save(output_path)
-def scale_image(input_path, scale_factor, output_path):
-    img = Image.open(input_path)
-    width, height = img.size
-    new_width = int(width * scale_factor)
-    new_height = int(height * scale_factor)
-    new_img = Image.new("RGB", (width, height), (0, 0, 0))
-    img_resized = img.resize((new_width, new_height))
-    position = ((width - new_width) // 2, (height - new_height) // 2)
-    new_img.paste(img_resized, position)
-    new_img.save(output_path)

+import os
+import pickle
+import random
+import time
+import warnings
+from itertools import combinations
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from datasets import load_dataset
+import cv2
+import gradio as gr
+import matplotlib.pyplot as plt
+import numpy as np
+import poselib
+from PIL import Image
+from ..hloc import (
+    DEVICE,
+    extract_features,
+    extractors,
+    logger,
+    match_dense,
+    match_features,
+    matchers,
+    DATASETS_REPO_ID,
+)
+from ..hloc.utils.base_model import dynamic_load
+from .viz import display_keypoints, display_matches, fig2im, plot_images
+from .modelcache import ARCSizeAwareModelCache as ModelCache
+warnings.simplefilter("ignore")
+ROOT = Path(__file__).parents[1]
+# some default values
+DEFAULT_SETTING_THRESHOLD = 0.1
+DEFAULT_SETTING_MAX_FEATURES = 2000
+DEFAULT_DEFAULT_KEYPOINT_THRESHOLD = 0.01
+DEFAULT_ENABLE_RANSAC = True
+DEFAULT_RANSAC_METHOD = "CV2_USAC_MAGSAC"
+DEFAULT_RANSAC_REPROJ_THRESHOLD = 8
+DEFAULT_RANSAC_CONFIDENCE = 0.9999
+DEFAULT_RANSAC_MAX_ITER = 10000
+DEFAULT_MIN_NUM_MATCHES = 4
+DEFAULT_MATCHING_THRESHOLD = 0.2
+DEFAULT_SETTING_GEOMETRY = "Homography"
+GRADIO_VERSION = gr.__version__.split(".")[0]
+MATCHER_ZOO = None
+model_cache = ModelCache()
+def load_config(config_name: str) -> Dict[str, Any]:
+    """
+    Load a YAML configuration file.
+    Args:
+        config_name: The path to the YAML configuration file.
+    Returns:
+        The configuration dictionary, with string keys and arbitrary values.
+    """
+    import yaml
+    with open(config_name, "r") as stream:
+        try:
+            config: Dict[str, Any] = yaml.safe_load(stream)
+        except yaml.YAMLError as exc:
+            logger.error(exc)
+    return config
+def get_matcher_zoo(
+    matcher_zoo: Dict[str, Dict[str, Union[str, bool]]],
+) -> Dict[str, Dict[str, Union[Callable, bool]]]:
+    """
+    Restore matcher configurations from a dictionary.
+    Args:
+        matcher_zoo: A dictionary with the matcher configurations,
+            where the configuration is a dictionary as loaded from a YAML file.
+    Returns:
+        A dictionary with the matcher configurations, where the configuration is
+            a function or a function instead of a string.
+    """
+    matcher_zoo_restored = {}
+    for k, v in matcher_zoo.items():
+        matcher_zoo_restored[k] = parse_match_config(v)
+    return matcher_zoo_restored
+def parse_match_config(conf):
+    if conf["dense"]:
+        return {
+            "matcher": match_dense.confs.get(conf["matcher"]),
+            "dense": True,
+            "info": conf.get("info", {}),
+        }
+    else:
+        return {
+            "feature": extract_features.confs.get(conf["feature"]),
+            "matcher": match_features.confs.get(conf["matcher"]),
+            "dense": False,
+            "info": conf.get("info", {}),
+        }
+def get_model(match_conf: Dict[str, Any]):
+    """
+    Load a matcher model from the provided configuration.
+    Args:
+        match_conf: A dictionary containing the model configuration.
+    Returns:
+        A matcher model instance.
+    """
+    Model = dynamic_load(matchers, match_conf["model"]["name"])
+    model = Model(match_conf["model"]).eval().to(DEVICE)
+    return model
+def get_feature_model(conf: Dict[str, Dict[str, Any]]):
+    """
+    Load a feature extraction model from the provided configuration.
+    Args:
+        conf: A dictionary containing the model configuration.
+    Returns:
+        A feature extraction model instance.
+    """
+    Model = dynamic_load(extractors, conf["model"]["name"])
+    model = Model(conf["model"]).eval().to(DEVICE)
+    return model
+def download_example_images(repo_id, output_dir):
+    logger.info(f"Download example dataset from huggingface: {repo_id}")
+    dataset = load_dataset(repo_id)
+    Path(output_dir).mkdir(parents=True, exist_ok=True)
+    for example in dataset["train"]:  # Assuming the dataset is in the "train" split
+        file_path = example["path"]
+        image = example["image"]  # Access the PIL.Image object directly
+        full_path = os.path.join(output_dir, file_path)
+        Path(os.path.dirname(full_path)).mkdir(parents=True, exist_ok=True)
+        image.save(full_path)
+    logger.info(f"Images saved to {output_dir} successfully.")
+    return Path(output_dir)
+def gen_examples(data_root: Path):
+    random.seed(1)
+    example_matchers = [
+        "disk+lightglue",
+        "xfeat(sparse)",
+        "dedode",
+        "loftr",
+        "disk",
+        "RoMa",
+        "d2net",
+        "aspanformer",
+        "topicfm",
+        "superpoint+superglue",
+        "superpoint+lightglue",
+        "superpoint+mnn",
+        "disk",
+    ]
+    data_root = Path(data_root)
+    if not Path(data_root).exists():
+        try:
+            download_example_images(DATASETS_REPO_ID, data_root)
+        except Exception as e:
+            logger.error(f"download_example_images error : {e}")
+            data_root = ROOT / "datasets"
+    if not Path(data_root / "sacre_coeur/mapping").exists():
+        download_example_images(DATASETS_REPO_ID, data_root)
+    def distribute_elements(A, B):
+        new_B = np.array(B, copy=True).flatten()
+        np.random.shuffle(new_B)
+        new_B = np.resize(new_B, len(A))
+        np.random.shuffle(new_B)
+        return new_B.tolist()
+    # normal examples
+    def gen_images_pairs(count: int = 5):
+        path = str(data_root / "sacre_coeur/mapping")
+        imgs_list = [
+            os.path.join(path, file)
+            for file in os.listdir(path)
+            if file.lower().endswith((".jpg", ".jpeg", ".png"))
+        ]
+        pairs = list(combinations(imgs_list, 2))
+        if len(pairs) < count:
+            count = len(pairs)
+        selected = random.sample(range(len(pairs)), count)
+        return [pairs[i] for i in selected]
+    # rotated examples
+    def gen_rot_image_pairs(count: int = 5):
+        path = data_root / "sacre_coeur/mapping"
+        path_rot = data_root / "sacre_coeur/mapping_rot"
+        rot_list = [45, 180, 90, 225, 270]
+        pairs = []
+        for file in os.listdir(path):
+            if file.lower().endswith((".jpg", ".jpeg", ".png")):
+                for rot in rot_list:
+                    file_rot = "{}_rot{}.jpg".format(Path(file).stem, rot)
+                    if (path_rot / file_rot).exists():
+                        pairs.append(
+                            [
+                                path / file,
+                                path_rot / file_rot,
+                            ]
+                        )
+        if len(pairs) < count:
+            count = len(pairs)
+        selected = random.sample(range(len(pairs)), count)
+        return [pairs[i] for i in selected]
+    def gen_scale_image_pairs(count: int = 5):
+        path = data_root / "sacre_coeur/mapping"
+        path_scale = data_root / "sacre_coeur/mapping_scale"
+        scale_list = [0.3, 0.5]
+        pairs = []
+        for file in os.listdir(path):
+            if file.lower().endswith((".jpg", ".jpeg", ".png")):
+                for scale in scale_list:
+                    file_scale = "{}_scale{}.jpg".format(Path(file).stem, scale)
+                    if (path_scale / file_scale).exists():
+                        pairs.append(
+                            [
+                                path / file,
+                                path_scale / file_scale,
+                            ]
+                        )
+        if len(pairs) < count:
+            count = len(pairs)
+        selected = random.sample(range(len(pairs)), count)
+        return [pairs[i] for i in selected]
+    # extramely hard examples
+    def gen_image_pairs_wxbs(count: int = None):
+        prefix = "wxbs_benchmark/.WxBS/v1.1"
+        wxbs_path = data_root / prefix
+        pairs = []
+        for catg in os.listdir(wxbs_path):
+            catg_path = wxbs_path / catg
+            if not catg_path.is_dir():
+                continue
+            for scene in os.listdir(catg_path):
+                scene_path = catg_path / scene
+                if not scene_path.is_dir():
+                    continue
+                img1_path = scene_path / "01.png"
+                img2_path = scene_path / "02.png"
+                if img1_path.exists() and img2_path.exists():
+                    pairs.append([str(img1_path), str(img2_path)])
+        return pairs
+    # image pair path
+    pairs = gen_images_pairs()
+    pairs += gen_rot_image_pairs()
+    pairs += gen_scale_image_pairs()
+    pairs += gen_image_pairs_wxbs()
+    match_setting_threshold = DEFAULT_SETTING_THRESHOLD
+    match_setting_max_features = DEFAULT_SETTING_MAX_FEATURES
+    detect_keypoints_threshold = DEFAULT_DEFAULT_KEYPOINT_THRESHOLD
+    ransac_method = DEFAULT_RANSAC_METHOD
+    ransac_reproj_threshold = DEFAULT_RANSAC_REPROJ_THRESHOLD
+    ransac_confidence = DEFAULT_RANSAC_CONFIDENCE
+    ransac_max_iter = DEFAULT_RANSAC_MAX_ITER
+    input_lists = []
+    dist_examples = distribute_elements(pairs, example_matchers)
+    for pair, mt in zip(pairs, dist_examples):
+        input_lists.append(
+            [
+                pair[0],
+                pair[1],
+                match_setting_threshold,
+                match_setting_max_features,
+                detect_keypoints_threshold,
+                mt,
+                # enable_ransac,
+                ransac_method,
+                ransac_reproj_threshold,
+                ransac_confidence,
+                ransac_max_iter,
+            ]
+        )
+    return input_lists
+def set_null_pred(feature_type: str, pred: dict):
+    if feature_type == "KEYPOINT":
+        pred["mmkeypoints0_orig"] = np.array([])
+        pred["mmkeypoints1_orig"] = np.array([])
+        pred["mmconf"] = np.array([])
+    elif feature_type == "LINE":
+        pred["mline_keypoints0_orig"] = np.array([])
+        pred["mline_keypoints1_orig"] = np.array([])
+    pred["H"] = None
+    pred["geom_info"] = {}
+    return pred
+def _filter_matches_opencv(
+    kp0: np.ndarray,
+    kp1: np.ndarray,
+    method: int = cv2.RANSAC,
+    reproj_threshold: float = 3.0,
+    confidence: float = 0.99,
+    max_iter: int = 2000,
+    geometry_type: str = "Homography",
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Filters matches between two sets of keypoints using OpenCV's findHomography.
+    Args:
+        kp0 (np.ndarray): Array of keypoints from the first image.
+        kp1 (np.ndarray): Array of keypoints from the second image.
+        method (int, optional): RANSAC method. Defaults to "cv2.RANSAC".
+        reproj_threshold (float, optional): RANSAC reprojection threshold. Defaults to 3.0.
+        confidence (float, optional): RANSAC confidence. Defaults to 0.99.
+        max_iter (int, optional): RANSAC maximum iterations. Defaults to 2000.
+        geometry_type (str, optional): Type of geometry. Defaults to "Homography".
+    Returns:
+        Tuple[np.ndarray, np.ndarray]: Homography matrix and mask.
+    """
+    if geometry_type == "Homography":
+        try:
+            M, mask = cv2.findHomography(
+                kp0,
+                kp1,
+                method=method,
+                ransacReprojThreshold=reproj_threshold,
+                confidence=confidence,
+                maxIters=max_iter,
+            )
+        except cv2.error:
+            logger.error("compute findHomography error, len(kp0): {}".format(len(kp0)))
+            return None, None
+    elif geometry_type == "Fundamental":
+        try:
+            M, mask = cv2.findFundamentalMat(
+                kp0,
+                kp1,
+                method=method,
+                ransacReprojThreshold=reproj_threshold,
+                confidence=confidence,
+                maxIters=max_iter,
+            )
+        except cv2.error:
+            logger.error(
+                "compute findFundamentalMat error, len(kp0): {}".format(len(kp0))
+            )
+            return None, None
+    mask = np.array(mask.ravel().astype("bool"), dtype="bool")
+    return M, mask
+def _filter_matches_poselib(
+    kp0: np.ndarray,
+    kp1: np.ndarray,
+    method: int = None,  # not used
+    reproj_threshold: float = 3,
+    confidence: float = 0.99,
+    max_iter: int = 2000,
+    geometry_type: str = "Homography",
+) -> dict:
+    """
+    Filters matches between two sets of keypoints using the poselib library.
+    Args:
+        kp0 (np.ndarray): Array of keypoints from the first image.
+        kp1 (np.ndarray): Array of keypoints from the second image.
+        method (str, optional): RANSAC method. Defaults to "RANSAC".
+        reproj_threshold (float, optional): RANSAC reprojection threshold. Defaults to 3.
+        confidence (float, optional): RANSAC confidence. Defaults to 0.99.
+        max_iter (int, optional): RANSAC maximum iterations. Defaults to 2000.
+        geometry_type (str, optional): Type of geometry. Defaults to "Homography".
+    Returns:
+        dict: Information about the homography estimation.
+    """
+    ransac_options = {
+        "max_iterations": max_iter,
+        # "min_iterations":  min_iter,
+        "success_prob": confidence,
+        "max_reproj_error": reproj_threshold,
+        # "progressive_sampling": args.sampler.lower() == 'prosac'
+    }
+    if geometry_type == "Homography":
+        M, info = poselib.estimate_homography(kp0, kp1, ransac_options)
+    elif geometry_type == "Fundamental":
+        M, info = poselib.estimate_fundamental(kp0, kp1, ransac_options)
+    else:
+        raise NotImplementedError
+    return M, np.array(info["inliers"])
+def proc_ransac_matches(
+    mkpts0: np.ndarray,
+    mkpts1: np.ndarray,
+    ransac_method: str = DEFAULT_RANSAC_METHOD,
+    ransac_reproj_threshold: float = 3.0,
+    ransac_confidence: float = 0.99,
+    ransac_max_iter: int = 2000,
+    geometry_type: str = "Homography",
+):
+    if ransac_method.startswith("CV2"):
+        logger.info(f"ransac_method: {ransac_method}, geometry_type: {geometry_type}")
+        return _filter_matches_opencv(
+            mkpts0,
+            mkpts1,
+            ransac_zoo[ransac_method],
+            ransac_reproj_threshold,
+            ransac_confidence,
+            ransac_max_iter,
+            geometry_type,
+        )
+    elif ransac_method.startswith("POSELIB"):
+        logger.info(f"ransac_method: {ransac_method}, geometry_type: {geometry_type}")
+        return _filter_matches_poselib(
+            mkpts0,
+            mkpts1,
+            None,
+            ransac_reproj_threshold,
+            ransac_confidence,
+            ransac_max_iter,
+            geometry_type,
+        )
+    else:
+        raise NotImplementedError
+def filter_matches(
+    pred: Dict[str, Any],
+    ransac_method: str = DEFAULT_RANSAC_METHOD,
+    ransac_reproj_threshold: float = DEFAULT_RANSAC_REPROJ_THRESHOLD,
+    ransac_confidence: float = DEFAULT_RANSAC_CONFIDENCE,
+    ransac_max_iter: int = DEFAULT_RANSAC_MAX_ITER,
+    ransac_estimator: str = None,
+):
+    """
+    Filter matches using RANSAC. If keypoints are available, filter by keypoints.
+    If lines are available, filter by lines. If both keypoints and lines are
+    available, filter by keypoints.
+    Args:
+        pred (Dict[str, Any]): dict of matches, including original keypoints.
+        ransac_method (str, optional): RANSAC method. Defaults to DEFAULT_RANSAC_METHOD.
+        ransac_reproj_threshold (float, optional): RANSAC reprojection threshold. Defaults to DEFAULT_RANSAC_REPROJ_THRESHOLD.
+        ransac_confidence (float, optional): RANSAC confidence. Defaults to DEFAULT_RANSAC_CONFIDENCE.
+        ransac_max_iter (int, optional): RANSAC maximum iterations. Defaults to DEFAULT_RANSAC_MAX_ITER.
+    Returns:
+        Dict[str, Any]: filtered matches.
+    """
+    mkpts0: Optional[np.ndarray] = None
+    mkpts1: Optional[np.ndarray] = None
+    feature_type: Optional[str] = None
+    if "mkeypoints0_orig" in pred.keys() and "mkeypoints1_orig" in pred.keys():
+        mkpts0 = pred["mkeypoints0_orig"]
+        mkpts1 = pred["mkeypoints1_orig"]
+        feature_type = "KEYPOINT"
+    elif (
+        "line_keypoints0_orig" in pred.keys() and "line_keypoints1_orig" in pred.keys()
+    ):
+        mkpts0 = pred["line_keypoints0_orig"]
+        mkpts1 = pred["line_keypoints1_orig"]
+        feature_type = "LINE"
+    else:
+        return set_null_pred(feature_type, pred)
+    if mkpts0 is None or mkpts0 is None:
+        return set_null_pred(feature_type, pred)
+    if ransac_method not in ransac_zoo.keys():
+        ransac_method = DEFAULT_RANSAC_METHOD
+    if len(mkpts0) < DEFAULT_MIN_NUM_MATCHES:
+        return set_null_pred(feature_type, pred)
+    geom_info = compute_geometry(
+        pred,
+        ransac_method=ransac_method,
+        ransac_reproj_threshold=ransac_reproj_threshold,
+        ransac_confidence=ransac_confidence,
+        ransac_max_iter=ransac_max_iter,
+    )
+    if "Homography" in geom_info.keys():
+        mask = geom_info["mask_h"]
+        if feature_type == "KEYPOINT":
+            pred["mmkeypoints0_orig"] = mkpts0[mask]
+            pred["mmkeypoints1_orig"] = mkpts1[mask]
+            pred["mmconf"] = pred["mconf"][mask]
+        elif feature_type == "LINE":
+            pred["mline_keypoints0_orig"] = mkpts0[mask]
+            pred["mline_keypoints1_orig"] = mkpts1[mask]
+        pred["H"] = np.array(geom_info["Homography"])
+    else:
+        set_null_pred(feature_type, pred)
+    # do not show mask
+    geom_info.pop("mask_h", None)
+    geom_info.pop("mask_f", None)
+    pred["geom_info"] = geom_info
+    return pred
+def compute_geometry(
+    pred: Dict[str, Any],
+    ransac_method: str = DEFAULT_RANSAC_METHOD,
+    ransac_reproj_threshold: float = DEFAULT_RANSAC_REPROJ_THRESHOLD,
+    ransac_confidence: float = DEFAULT_RANSAC_CONFIDENCE,
+    ransac_max_iter: int = DEFAULT_RANSAC_MAX_ITER,
+) -> Dict[str, List[float]]:
+    """
+    Compute geometric information of matches, including Fundamental matrix,
+    Homography matrix, and rectification matrices (if available).
+    Args:
+        pred (Dict[str, Any]): dict of matches, including original keypoints.
+        ransac_method (str, optional): RANSAC method. Defaults to DEFAULT_RANSAC_METHOD.
+        ransac_reproj_threshold (float, optional): RANSAC reprojection threshold. Defaults to DEFAULT_RANSAC_REPROJ_THRESHOLD.
+        ransac_confidence (float, optional): RANSAC confidence. Defaults to DEFAULT_RANSAC_CONFIDENCE.
+        ransac_max_iter (int, optional): RANSAC maximum iterations. Defaults to DEFAULT_RANSAC_MAX_ITER.
+    Returns:
+        Dict[str, List[float]]: geometric information in form of a dict.
+    """
+    mkpts0: Optional[np.ndarray] = None
+    mkpts1: Optional[np.ndarray] = None
+    if "mkeypoints0_orig" in pred.keys() and "mkeypoints1_orig" in pred.keys():
+        mkpts0 = pred["mkeypoints0_orig"]
+        mkpts1 = pred["mkeypoints1_orig"]
+    elif (
+        "line_keypoints0_orig" in pred.keys() and "line_keypoints1_orig" in pred.keys()
+    ):
+        mkpts0 = pred["line_keypoints0_orig"]
+        mkpts1 = pred["line_keypoints1_orig"]
+    if mkpts0 is not None and mkpts1 is not None:
+        if len(mkpts0) < 2 * DEFAULT_MIN_NUM_MATCHES:
+            return {}
+        geo_info: Dict[str, List[float]] = {}
+        F, mask_f = proc_ransac_matches(
+            mkpts0,
+            mkpts1,
+            ransac_method,
+            ransac_reproj_threshold,
+            ransac_confidence,
+            ransac_max_iter,
+            geometry_type="Fundamental",
+        )
+        if F is not None:
+            geo_info["Fundamental"] = F.tolist()
+            geo_info["mask_f"] = mask_f
+        H, mask_h = proc_ransac_matches(
+            mkpts0,
+            mkpts1,
+            ransac_method,
+            ransac_reproj_threshold,
+            ransac_confidence,
+            ransac_max_iter,
+            geometry_type="Homography",
+        )
+        h0, w0, _ = pred["image0_orig"].shape
+        if H is not None:
+            geo_info["Homography"] = H.tolist()
+            geo_info["mask_h"] = mask_h
+            try:
+                _, H1, H2 = cv2.stereoRectifyUncalibrated(
+                    mkpts0.reshape(-1, 2),
+                    mkpts1.reshape(-1, 2),
+                    F,
+                    imgSize=(w0, h0),
+                )
+                geo_info["H1"] = H1.tolist()
+                geo_info["H2"] = H2.tolist()
+            except cv2.error as e:
+                logger.error(f"StereoRectifyUncalibrated failed, skip! error: {e}")
+        return geo_info
+    else:
+        return {}
+def wrap_images(
+    img0: np.ndarray,
+    img1: np.ndarray,
+    geo_info: Optional[Dict[str, List[float]]],
+    geom_type: str,
+) -> Tuple[Optional[str], Optional[Dict[str, List[float]]]]:
+    """
+    Wraps the images based on the geometric transformation used to align them.
+    Args:
+        img0: numpy array representing the first image.
+        img1: numpy array representing the second image.
+        geo_info: dictionary containing the geometric transformation information.
+        geom_type: type of geometric transformation used to align the images.
+    Returns:
+        A tuple containing a base64 encoded image string and a dictionary with the transformation matrix.
+    """
+    h0, w0, _ = img0.shape
+    h1, w1, _ = img1.shape
+    if geo_info is not None and len(geo_info) != 0:
+        rectified_image0 = img0
+        rectified_image1 = None
+        if "Homography" not in geo_info:
+            logger.warning(f"{geom_type} not exist, maybe too less matches")
+            return None, None
+        H = np.array(geo_info["Homography"])
+        title: List[str] = []
+        if geom_type == "Homography":
+            H_inv = np.linalg.inv(H)
+            rectified_image1 = cv2.warpPerspective(img1, H_inv, (w0, h0))
+            title = ["Image 0", "Image 1 - warped"]
+        elif geom_type == "Fundamental":
+            if geom_type not in geo_info:
+                logger.warning(f"{geom_type} not exist, maybe too less matches")
+                return None, None
+            else:
+                H1, H2 = np.array(geo_info["H1"]), np.array(geo_info["H2"])
+                rectified_image0 = cv2.warpPerspective(img0, H1, (w0, h0))
+                rectified_image1 = cv2.warpPerspective(img1, H2, (w1, h1))
+                title = ["Image 0 - warped", "Image 1 - warped"]
+        else:
+            print("Error: Unknown geometry type")
+        fig = plot_images(
+            [rectified_image0.squeeze(), rectified_image1.squeeze()],
+            title,
+            dpi=300,
+        )
+        return fig2im(fig), rectified_image1
+    else:
+        return None, None
+def generate_warp_images(
+    input_image0: np.ndarray,
+    input_image1: np.ndarray,
+    matches_info: Dict[str, Any],
+    choice: str,
+) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
+    """
+    Changes the estimate of the geometric transformation used to align the images.
+    Args:
+        input_image0: First input image.
+        input_image1: Second input image.
+        matches_info: Dictionary containing information about the matches.
+        choice: Type of geometric transformation to use ('Homography' or 'Fundamental') or 'No' to disable.
+    Returns:
+        A tuple containing the updated images and the warpped images.
+    """
+    if (
+        matches_info is None
+        or len(matches_info) < 1
+        or "geom_info" not in matches_info.keys()
+    ):
+        return None, None
+    geom_info = matches_info["geom_info"]
+    warped_image = None
+    if choice != "No":
+        wrapped_image_pair, warped_image = wrap_images(
+            input_image0, input_image1, geom_info, choice
+        )
+        return wrapped_image_pair, warped_image
+    else:
+        return None, None
+def send_to_match(state_cache: Dict[str, Any]):
+    """
+    Send the state cache to the match function.
+    Args:
+        state_cache (Dict[str, Any]): Current state of the app.
+    Returns:
+        None
+    """
+    if state_cache:
+        return (
+            state_cache["image0_orig"],
+            state_cache["wrapped_image"],
+        )
+    else:
+        return None, None
+def run_ransac(
+    state_cache: Dict[str, Any],
+    choice_geometry_type: str,
+    ransac_method: str = DEFAULT_RANSAC_METHOD,
+    ransac_reproj_threshold: int = DEFAULT_RANSAC_REPROJ_THRESHOLD,
+    ransac_confidence: float = DEFAULT_RANSAC_CONFIDENCE,
+    ransac_max_iter: int = DEFAULT_RANSAC_MAX_ITER,
+) -> Tuple[Optional[np.ndarray], Optional[Dict[str, int]]]:
+    """
+    Run RANSAC matches and return the output images and the number of matches.
+    Args:
+        state_cache (Dict[str, Any]): Current state of the app, including the matches.
+        ransac_method (str, optional): RANSAC method. Defaults to DEFAULT_RANSAC_METHOD.
+        ransac_reproj_threshold (int, optional): RANSAC reprojection threshold. Defaults to DEFAULT_RANSAC_REPROJ_THRESHOLD.
+        ransac_confidence (float, optional): RANSAC confidence. Defaults to DEFAULT_RANSAC_CONFIDENCE.
+        ransac_max_iter (int, optional): RANSAC maximum iterations. Defaults to DEFAULT_RANSAC_MAX_ITER.
+    Returns:
+        Tuple[Optional[np.ndarray], Optional[Dict[str, int]]]: Tuple containing the output images and the number of matches.
+    """
+    if not state_cache:
+        logger.info("Run Match first before Rerun RANSAC")
+        gr.Warning("Run Match first before Rerun RANSAC")
+        return None, None
+    t1 = time.time()
+    logger.info(
+        f"Run RANSAC matches using: {ransac_method} with threshold: {ransac_reproj_threshold}"
+    )
+    logger.info(
+        f"Run RANSAC matches using: {ransac_confidence} with iter: {ransac_max_iter}"
+    )
+    # if enable_ransac:
+    filter_matches(
+        state_cache,
+        ransac_method=ransac_method,
+        ransac_reproj_threshold=ransac_reproj_threshold,
+        ransac_confidence=ransac_confidence,
+        ransac_max_iter=ransac_max_iter,
+    )
+    logger.info(f"RANSAC matches done using: {time.time()-t1:.3f}s")
+    t1 = time.time()
+    # plot images with ransac matches
+    titles = [
+        "Image 0 - Ransac matched keypoints",
+        "Image 1 - Ransac matched keypoints",
+    ]
+    output_matches_ransac, num_matches_ransac = display_matches(
+        state_cache, titles=titles, tag="KPTS_RANSAC"
+    )
+    logger.info(f"Display matches done using: {time.time()-t1:.3f}s")
+    t1 = time.time()
+    # compute warp images
+    output_wrapped, warped_image = generate_warp_images(
+        state_cache["image0_orig"],
+        state_cache["image1_orig"],
+        state_cache,
+        choice_geometry_type,
+    )
+    plt.close("all")
+    num_matches_raw = state_cache["num_matches_raw"]
+    state_cache["wrapped_image"] = warped_image
+    # tmp_state_cache = tempfile.NamedTemporaryFile(suffix='.pkl', delete=False)
+    tmp_state_cache = "output.pkl"
+    with open(tmp_state_cache, "wb") as f:
+        pickle.dump(state_cache, f)
+    logger.info("Dump results done!")
+    return (
+        output_matches_ransac,
+        {
+            "num_matches_raw": num_matches_raw,
+            "num_matches_ransac": num_matches_ransac,
+        },
+        output_wrapped,
+        tmp_state_cache,
+    )
+def generate_fake_outputs(
+    output_keypoints,
+    output_matches_raw,
+    output_matches_ransac,
+    match_conf,
+    extract_conf,
+    pred,
+):
+    return (
+        output_keypoints,
+        output_matches_raw,
+        output_matches_ransac,
+        {},
+        {
+            "match_conf": match_conf,
+            "extractor_conf": extract_conf,
+        },
+        {
+            "geom_info": pred.get("geom_info", {}),
+        },
+        None,
+        None,
+        None,
+    )
+def run_matching(
+    image0: np.ndarray,
+    image1: np.ndarray,
+    match_threshold: float,
+    extract_max_keypoints: int,
+    keypoint_threshold: float,
+    key: str,
+    ransac_method: str = DEFAULT_RANSAC_METHOD,
+    ransac_reproj_threshold: int = DEFAULT_RANSAC_REPROJ_THRESHOLD,
+    ransac_confidence: float = DEFAULT_RANSAC_CONFIDENCE,
+    ransac_max_iter: int = DEFAULT_RANSAC_MAX_ITER,
+    choice_geometry_type: str = DEFAULT_SETTING_GEOMETRY,
+    matcher_zoo: Dict[str, Any] = None,
+    force_resize: bool = False,
+    image_width: int = 640,
+    image_height: int = 480,
+    use_cached_model: bool = True,
+) -> Tuple[
+    np.ndarray,
+    np.ndarray,
+    np.ndarray,
+    Dict[str, int],
+    Dict[str, Dict[str, Any]],
+    Dict[str, Dict[str, float]],
+    np.ndarray,
+]:
+    """Match two images using the given parameters.
+    Args:
+        image0 (np.ndarray): RGB image 0.
+        image1 (np.ndarray): RGB image 1.
+        match_threshold (float): match threshold.
+        extract_max_keypoints (int): number of keypoints to extract.
+        keypoint_threshold (float): keypoint threshold.
+        key (str): key of the model to use.
+        ransac_method (str, optional): RANSAC method to use.
+        ransac_reproj_threshold (int, optional): RANSAC reprojection threshold.
+        ransac_confidence (float, optional): RANSAC confidence level.
+        ransac_max_iter (int, optional): RANSAC maximum number of iterations.
+        choice_geometry_type (str, optional): setting of geometry estimation.
+        matcher_zoo (Dict[str, Any], optional): matcher zoo. Defaults to None.
+        force_resize (bool, optional): force resize. Defaults to False.
+        image_width (int, optional): image width. Defaults to 640.
+        image_height (int, optional): image height. Defaults to 480.
+        use_cached_model (bool, optional): use cached model. Defaults to False.
+    Returns:
+        tuple:
+            - output_keypoints (np.ndarray): image with keypoints.
+            - output_matches_raw (np.ndarray): image with raw matches.
+            - output_matches_ransac (np.ndarray): image with RANSAC matches.
+            - num_matches (Dict[str, int]): number of raw and RANSAC matches.
+            - configs (Dict[str, Dict[str, Any]]): match and feature extraction configs.
+            - geom_info (Dict[str, Dict[str, float]]): geometry information.
+            - output_wrapped (np.ndarray): wrapped images.
+    """
+    # image0 and image1 is RGB mode
+    if image0 is None or image1 is None:
+        logger.error(
+            "Error: No images found! Please upload two images or select an example."
+        )
+        raise gr.Error(
+            "Error: No images found! Please upload two images or select an example."
+        )
+    # init output
+    output_keypoints = None
+    output_matches_raw = None
+    output_matches_ransac = None
+    t0 = time.time()
+    model = matcher_zoo[key]
+    match_conf = model["matcher"]
+    # update match config
+    match_conf["model"]["match_threshold"] = match_threshold
+    match_conf["model"]["max_keypoints"] = extract_max_keypoints
+    cache_key = "{}_{}".format(key, match_conf["model"]["name"])
+    efficiency = model["info"].get("efficiency", "high")
+    if efficiency == "low":
+        gr.Warning(
+            "Matcher {} is time-consuming, please wait for a while".format(
+                model["info"].get("name", "unknown")
+            )
+        )
+    if use_cached_model:
+        # because of the model cache, we need to update the config
+        matcher = model_cache.load_model(cache_key, get_model, match_conf)
+        matcher.conf["max_keypoints"] = extract_max_keypoints
+        matcher.conf["match_threshold"] = match_threshold
+        logger.info(f"Loaded cached model {cache_key}")
+    else:
+        matcher = get_model(match_conf)
+    logger.info(f"Loading model using: {time.time()-t0:.3f}s")
+    t1 = time.time()
+    yield generate_fake_outputs(
+        output_keypoints, output_matches_raw, output_matches_ransac, match_conf, {}, {}
+    )
+    if model["dense"]:
+        if not match_conf["preprocessing"].get("force_resize", False):
+            match_conf["preprocessing"]["force_resize"] = force_resize
+        else:
+            logger.info("preprocessing is already resized")
+        if force_resize:
+            match_conf["preprocessing"]["height"] = image_height
+            match_conf["preprocessing"]["width"] = image_width
+            logger.info(f"Force resize to {image_width}x{image_height}")
+        pred = match_dense.match_images(
+            matcher, image0, image1, match_conf["preprocessing"], device=DEVICE
+        )
+        del matcher
+        extract_conf = None
+    else:
+        extract_conf = model["feature"]
+        # update extract config
+        extract_conf["model"]["max_keypoints"] = extract_max_keypoints
+        extract_conf["model"]["keypoint_threshold"] = keypoint_threshold
+        cache_key = "{}_{}".format(key, extract_conf["model"]["name"])
+        if use_cached_model:
+            extractor = model_cache.load_model(
+                cache_key, get_feature_model, extract_conf
+            )
+            # because of the model cache, we need to update the config
+            extractor.conf["max_keypoints"] = extract_max_keypoints
+            extractor.conf["keypoint_threshold"] = keypoint_threshold
+            logger.info(f"Loaded cached model {cache_key}")
+        else:
+            extractor = get_feature_model(extract_conf)
+        if not extract_conf["preprocessing"].get("force_resize", False):
+            extract_conf["preprocessing"]["force_resize"] = force_resize
+        else:
+            logger.info("preprocessing is already resized")
+        if force_resize:
+            extract_conf["preprocessing"]["height"] = image_height
+            extract_conf["preprocessing"]["width"] = image_width
+            logger.info(f"Force resize to {image_width}x{image_height}")
+        pred0 = extract_features.extract(
+            extractor, image0, extract_conf["preprocessing"]
+        )
+        pred1 = extract_features.extract(
+            extractor, image1, extract_conf["preprocessing"]
+        )
+        pred = match_features.match_images(matcher, pred0, pred1)
+        del extractor
+    # gr.Info(
+    #     f"Matching images done using: {time.time()-t1:.3f}s",
+    # )
+    logger.info(f"Matching images done using: {time.time()-t1:.3f}s")
+    t1 = time.time()
+    # plot images with keypoints
+    titles = [
+        "Image 0 - Keypoints",
+        "Image 1 - Keypoints",
+    ]
+    output_keypoints = display_keypoints(pred, titles=titles)
+    yield generate_fake_outputs(
+        output_keypoints,
+        output_matches_raw,
+        output_matches_ransac,
+        match_conf,
+        extract_conf,
+        pred,
+    )
+    # plot images with raw matches
+    titles = [
+        "Image 0 - Raw matched keypoints",
+        "Image 1 - Raw matched keypoints",
+    ]
+    output_matches_raw, num_matches_raw = display_matches(pred, titles=titles)
+    yield generate_fake_outputs(
+        output_keypoints,
+        output_matches_raw,
+        output_matches_ransac,
+        match_conf,
+        extract_conf,
+        pred,
+    )
+    # if enable_ransac:
+    filter_matches(
+        pred,
+        ransac_method=ransac_method,
+        ransac_reproj_threshold=ransac_reproj_threshold,
+        ransac_confidence=ransac_confidence,
+        ransac_max_iter=ransac_max_iter,
+    )
+    # gr.Info(f"RANSAC matches done using: {time.time()-t1:.3f}s")
+    logger.info(f"RANSAC matches done using: {time.time()-t1:.3f}s")
+    t1 = time.time()
+    # plot images with ransac matches
+    titles = [
+        "Image 0 - Ransac matched keypoints",
+        "Image 1 - Ransac matched keypoints",
+    ]
+    output_matches_ransac, num_matches_ransac = display_matches(
+        pred, titles=titles, tag="KPTS_RANSAC"
+    )
+    yield generate_fake_outputs(
+        output_keypoints,
+        output_matches_raw,
+        output_matches_ransac,
+        match_conf,
+        extract_conf,
+        pred,
+    )
+    # gr.Info(f"Display matches done using: {time.time()-t1:.3f}s")
+    logger.info(f"Display matches done using: {time.time()-t1:.3f}s")
+    t1 = time.time()
+    # plot wrapped images
+    output_wrapped, warped_image = generate_warp_images(
+        pred["image0_orig"],
+        pred["image1_orig"],
+        pred,
+        choice_geometry_type,
+    )
+    plt.close("all")
+    # gr.Info(f"In summary, total time: {time.time()-t0:.3f}s")
+    logger.info(f"TOTAL time: {time.time()-t0:.3f}s")
+    state_cache = pred
+    state_cache["num_matches_raw"] = num_matches_raw
+    state_cache["num_matches_ransac"] = num_matches_ransac
+    state_cache["wrapped_image"] = warped_image
+    # tmp_state_cache = tempfile.NamedTemporaryFile(suffix='.pkl', delete=False)
+    tmp_state_cache = "output.pkl"
+    with open(tmp_state_cache, "wb") as f:
+        pickle.dump(state_cache, f)
+    logger.info("Dump results done!")
+    yield (
+        output_keypoints,
+        output_matches_raw,
+        output_matches_ransac,
+        {
+            "num_raw_matches": num_matches_raw,
+            "num_ransac_matches": num_matches_ransac,
+        },
+        {
+            "match_conf": match_conf,
+            "extractor_conf": extract_conf,
+        },
+        {
+            "geom_info": pred.get("geom_info", {}),
+        },
+        output_wrapped,
+        state_cache,
+        tmp_state_cache,
+    )
+# @ref: https://docs.opencv.org/4.x/d0/d74/md__build_4_x-contrib_docs-lin64_opencv_doc_tutorials_calib3d_usac.html
+# AND: https://opencv.org/blog/2021/06/09/evaluating-opencvs-new-ransacs
+ransac_zoo = {
+    "POSELIB": "LO-RANSAC",
+    "CV2_RANSAC": cv2.RANSAC,
+    "CV2_USAC_MAGSAC": cv2.USAC_MAGSAC,
+    "CV2_USAC_DEFAULT": cv2.USAC_DEFAULT,
+    "CV2_USAC_FM_8PTS": cv2.USAC_FM_8PTS,
+    "CV2_USAC_PROSAC": cv2.USAC_PROSAC,
+    "CV2_USAC_FAST": cv2.USAC_FAST,
+    "CV2_USAC_ACCURATE": cv2.USAC_ACCURATE,
+    "CV2_USAC_PARALLEL": cv2.USAC_PARALLEL,
+}
+def rotate_image(input_path, degrees, output_path):
+    img = Image.open(input_path)
+    img_rotated = img.rotate(-degrees)
+    img_rotated.save(output_path)
+def scale_image(input_path, scale_factor, output_path):
+    img = Image.open(input_path)
+    width, height = img.size
+    new_width = int(width * scale_factor)
+    new_height = int(height * scale_factor)
+    new_img = Image.new("RGB", (width, height), (0, 0, 0))
+    img_resized = img.resize((new_width, new_height))
+    position = ((width - new_width) // 2, (height - new_height) // 2)
+    new_img.paste(img_resized, position)
+    new_img.save(output_path)

imcui/ui/viz.py CHANGED Viewed

@@ -1,481 +1,481 @@
-import typing
-from pathlib import Path
-from typing import Dict, List, Optional, Tuple, Union
-import cv2
-import matplotlib
-import matplotlib.pyplot as plt
-import numpy as np
-import seaborn as sns
-from ..hloc.utils.viz import add_text, plot_keypoints
-np.random.seed(1995)
-color_map = np.arange(100)
-np.random.shuffle(color_map)
-def plot_images(
-    imgs: List[np.ndarray],
-    titles: Optional[List[str]] = None,
-    cmaps: Union[str, List[str]] = "gray",
-    dpi: int = 100,
-    size: Optional[int] = 5,
-    pad: float = 0.5,
-) -> plt.Figure:
-    """Plot a set of images horizontally.
-    Args:
-        imgs: a list of NumPy or PyTorch images, RGB (H, W, 3) or mono (H, W).
-        titles: a list of strings, as titles for each image.
-        cmaps: colormaps for monochrome images. If a single string is given,
-            it is used for all images.
-        dpi: DPI of the figure.
-        size: figure size in inches (width). If not provided, the figure
-            size is determined automatically.
-        pad: padding between subplots, in inches.
-    Returns:
-        The created figure.
-    """
-    n = len(imgs)
-    if not isinstance(cmaps, list):
-        cmaps = [cmaps] * n
-    figsize = (size * n, size * 6 / 5) if size is not None else None
-    fig, ax = plt.subplots(1, n, figsize=figsize, dpi=dpi)
-    if n == 1:
-        ax = [ax]
-    for i in range(n):
-        ax[i].imshow(imgs[i], cmap=plt.get_cmap(cmaps[i]))
-        ax[i].get_yaxis().set_ticks([])
-        ax[i].get_xaxis().set_ticks([])
-        ax[i].set_axis_off()
-        for spine in ax[i].spines.values():  # remove frame
-            spine.set_visible(False)
-        if titles:
-            ax[i].set_title(titles[i])
-    fig.tight_layout(pad=pad)
-    return fig
-def plot_color_line_matches(
-    lines: List[np.ndarray],
-    correct_matches: Optional[np.ndarray] = None,
-    lw: float = 2.0,
-    indices: Tuple[int, int] = (0, 1),
-) -> matplotlib.figure.Figure:
-    """Plot line matches for existing images with multiple colors.
-    Args:
-        lines: List of ndarrays of size (N, 2, 2) representing line segments.
-        correct_matches: Optional bool array of size (N,) indicating correct
-            matches. If not None, display wrong matches with a low alpha.
-        lw: Line width as float pixels.
-        indices: Indices of the images to draw the matches on.
-    Returns:
-        The modified matplotlib figure.
-    """
-    n_lines = lines[0].shape[0]
-    colors = sns.color_palette("husl", n_colors=n_lines)
-    np.random.shuffle(colors)
-    alphas = np.ones(n_lines)
-    if correct_matches is not None:
-        alphas[~np.array(correct_matches)] = 0.2
-    fig = plt.gcf()
-    ax = typing.cast(List[matplotlib.axes.Axes], fig.axes)
-    assert len(ax) > max(indices)
-    axes = [ax[i] for i in indices]
-    fig.canvas.draw()
-    # Plot the lines
-    for a, l in zip(axes, lines):  # noqa: E741
-        # Transform the points into the figure coordinate system
-        transFigure = fig.transFigure.inverted()
-        endpoint0 = transFigure.transform(a.transData.transform(l[:, 0]))
-        endpoint1 = transFigure.transform(a.transData.transform(l[:, 1]))
-        fig.lines += [
-            matplotlib.lines.Line2D(
-                (endpoint0[i, 0], endpoint1[i, 0]),
-                (endpoint0[i, 1], endpoint1[i, 1]),
-                zorder=1,
-                transform=fig.transFigure,
-                c=colors[i],
-                alpha=alphas[i],
-                linewidth=lw,
-            )
-            for i in range(n_lines)
-        ]
-    return fig
-def make_matching_figure(
-    img0: np.ndarray,
-    img1: np.ndarray,
-    mkpts0: np.ndarray,
-    mkpts1: np.ndarray,
-    color: np.ndarray,
-    titles: Optional[List[str]] = None,
-    kpts0: Optional[np.ndarray] = None,
-    kpts1: Optional[np.ndarray] = None,
-    text: List[str] = [],
-    dpi: int = 75,
-    path: Optional[Path] = None,
-    pad: float = 0.0,
-) -> Optional[plt.Figure]:
-    """Draw image pair with matches.
-    Args:
-        img0: image0 as HxWx3 numpy array.
-        img1: image1 as HxWx3 numpy array.
-        mkpts0: matched points in image0 as Nx2 numpy array.
-        mkpts1: matched points in image1 as Nx2 numpy array.
-        color: colors for the matches as Nx4 numpy array.
-        titles: titles for the two subplots.
-        kpts0: keypoints in image0 as Kx2 numpy array.
-        kpts1: keypoints in image1 as Kx2 numpy array.
-        text: list of strings to display in the top-left corner of the image.
-        dpi: dots per inch of the saved figure.
-        path: if not None, save the figure to this path.
-        pad: padding around the image as a fraction of the image size.
-    Returns:
-        The matplotlib Figure object if path is None.
-    """
-    # draw image pair
-    fig, axes = plt.subplots(1, 2, figsize=(10, 6), dpi=dpi)
-    axes[0].imshow(img0)  # , cmap='gray')
-    axes[1].imshow(img1)  # , cmap='gray')
-    for i in range(2):  # clear all frames
-        axes[i].get_yaxis().set_ticks([])
-        axes[i].get_xaxis().set_ticks([])
-        for spine in axes[i].spines.values():
-            spine.set_visible(False)
-        if titles is not None:
-            axes[i].set_title(titles[i])
-    plt.tight_layout(pad=pad)
-    if kpts0 is not None:
-        assert kpts1 is not None
-        axes[0].scatter(kpts0[:, 0], kpts0[:, 1], c="w", s=5)
-        axes[1].scatter(kpts1[:, 0], kpts1[:, 1], c="w", s=5)
-    # draw matches
-    if mkpts0.shape[0] != 0 and mkpts1.shape[0] != 0 and mkpts0.shape == mkpts1.shape:
-        fig.canvas.draw()
-        transFigure = fig.transFigure.inverted()
-        fkpts0 = transFigure.transform(axes[0].transData.transform(mkpts0))
-        fkpts1 = transFigure.transform(axes[1].transData.transform(mkpts1))
-        fig.lines = [
-            matplotlib.lines.Line2D(
-                (fkpts0[i, 0], fkpts1[i, 0]),
-                (fkpts0[i, 1], fkpts1[i, 1]),
-                transform=fig.transFigure,
-                c=color[i],
-                linewidth=2,
-            )
-            for i in range(len(mkpts0))
-        ]
-        # freeze the axes to prevent the transform to change
-        axes[0].autoscale(enable=False)
-        axes[1].autoscale(enable=False)
-        axes[0].scatter(mkpts0[:, 0], mkpts0[:, 1], c=color[..., :3], s=4)
-        axes[1].scatter(mkpts1[:, 0], mkpts1[:, 1], c=color[..., :3], s=4)
-    # put txts
-    txt_color = "k" if img0[:100, :200].mean() > 200 else "w"
-    fig.text(
-        0.01,
-        0.99,
-        "\n".join(text),
-        transform=fig.axes[0].transAxes,
-        fontsize=15,
-        va="top",
-        ha="left",
-        color=txt_color,
-    )
-    # save or return figure
-    if path:
-        plt.savefig(str(path), bbox_inches="tight", pad_inches=0)
-        plt.close()
-    else:
-        return fig
-def error_colormap(err: np.ndarray, thr: float, alpha: float = 1.0) -> np.ndarray:
-    """
-    Create a colormap based on the error values.
-    Args:
-        err: Error values as a numpy array of shape (N,).
-        thr: Threshold value for the error.
-        alpha: Alpha value for the colormap, between 0 and 1.
-    Returns:
-        Colormap as a numpy array of shape (N, 4) with values in [0, 1].
-    """
-    assert alpha <= 1.0 and alpha > 0, f"Invaid alpha value: {alpha}"
-    x = 1 - np.clip(err / (thr * 2), 0, 1)
-    return np.clip(
-        np.stack([2 - x * 2, x * 2, np.zeros_like(x), np.ones_like(x) * alpha], -1),
-        0,
-        1,
-    )
-def fig2im(fig: matplotlib.figure.Figure) -> np.ndarray:
-    """
-    Convert a matplotlib figure to a numpy array with RGB values.
-    Args:
-        fig: A matplotlib figure.
-    Returns:
-        A numpy array with shape (height, width, 3) and dtype uint8 containing
-        the RGB values of the figure.
-    """
-    fig.canvas.draw()
-    (width, height) = fig.canvas.get_width_height()
-    buf_ndarray = np.frombuffer(fig.canvas.tostring_rgb(), dtype="u1")
-    return buf_ndarray.reshape(height, width, 3)
-def draw_matches_core(
-    mkpts0: List[np.ndarray],
-    mkpts1: List[np.ndarray],
-    img0: np.ndarray,
-    img1: np.ndarray,
-    conf: np.ndarray,
-    titles: Optional[List[str]] = None,
-    texts: Optional[List[str]] = None,
-    dpi: int = 150,
-    path: Optional[str] = None,
-    pad: float = 0.5,
-) -> np.ndarray:
-    """
-    Draw matches between two images.
-    Args:
-        mkpts0: List of matches from the first image, with shape (N, 2)
-        mkpts1: List of matches from the second image, with shape (N, 2)
-        img0: First image, with shape (H, W, 3)
-        img1: Second image, with shape (H, W, 3)
-        conf: Confidence values for the matches, with shape (N,)
-        titles: Optional list of title strings for the plot
-        dpi: DPI for the saved image
-        path: Optional path to save the image to. If None, the image is not saved.
-        pad: Padding between subplots
-    Returns:
-        The figure as a numpy array with shape (height, width, 3) and dtype uint8
-        containing the RGB values of the figure.
-    """
-    thr = 0.5
-    color = error_colormap(1 - conf, thr, alpha=0.1)
-    text = [
-        # "image name",
-        f"#Matches: {len(mkpts0)}",
-    ]
-    if path:
-        fig2im(
-            make_matching_figure(
-                img0,
-                img1,
-                mkpts0,
-                mkpts1,
-                color,
-                titles=titles,
-                text=text,
-                path=path,
-                dpi=dpi,
-                pad=pad,
-            )
-        )
-    else:
-        return fig2im(
-            make_matching_figure(
-                img0,
-                img1,
-                mkpts0,
-                mkpts1,
-                color,
-                titles=titles,
-                text=text,
-                pad=pad,
-                dpi=dpi,
-            )
-        )
-def draw_image_pairs(
-    img0: np.ndarray,
-    img1: np.ndarray,
-    text: List[str] = [],
-    dpi: int = 75,
-    path: Optional[str] = None,
-    pad: float = 0.5,
-) -> np.ndarray:
-    """Draw image pair horizontally.
-    Args:
-        img0: First image, with shape (H, W, 3)
-        img1: Second image, with shape (H, W, 3)
-        text: List of strings to print. Each string is a new line.
-        dpi: DPI of the figure.
-        path: Path to save the image to. If None, the image is not saved and
-            the function returns the figure as a numpy array with shape
-            (height, width, 3) and dtype uint8 containing the RGB values of the
-            figure.
-        pad: Padding between subplots
-    Returns:
-        The figure as a numpy array with shape (height, width, 3) and dtype uint8
-        containing the RGB values of the figure, or None if path is not None.
-    """
-    # draw image pair
-    fig, axes = plt.subplots(1, 2, figsize=(10, 6), dpi=dpi)
-    axes[0].imshow(img0)  # , cmap='gray')
-    axes[1].imshow(img1)  # , cmap='gray')
-    for i in range(2):  # clear all frames
-        axes[i].get_yaxis().set_ticks([])
-        axes[i].get_xaxis().set_ticks([])
-        for spine in axes[i].spines.values():
-            spine.set_visible(False)
-    plt.tight_layout(pad=pad)
-    # put txts
-    txt_color = "k" if img0[:100, :200].mean() > 200 else "w"
-    fig.text(
-        0.01,
-        0.99,
-        "\n".join(text),
-        transform=fig.axes[0].transAxes,
-        fontsize=15,
-        va="top",
-        ha="left",
-        color=txt_color,
-    )
-    # save or return figure
-    if path:
-        plt.savefig(str(path), bbox_inches="tight", pad_inches=0)
-        plt.close()
-    else:
-        return fig2im(fig)
-def display_keypoints(pred: dict, titles: List[str] = []):
-    img0 = pred["image0_orig"]
-    img1 = pred["image1_orig"]
-    output_keypoints = plot_images([img0, img1], titles=titles, dpi=300)
-    if "keypoints0_orig" in pred.keys() and "keypoints1_orig" in pred.keys():
-        plot_keypoints([pred["keypoints0_orig"], pred["keypoints1_orig"]])
-        text = (
-            f"# keypoints0: {len(pred['keypoints0_orig'])} \n"
-            + f"# keypoints1: {len(pred['keypoints1_orig'])}"
-        )
-        add_text(0, text, fs=15)
-    output_keypoints = fig2im(output_keypoints)
-    return output_keypoints
-def display_matches(
-    pred: Dict[str, np.ndarray],
-    titles: List[str] = [],
-    texts: List[str] = [],
-    dpi: int = 300,
-    tag: str = "KPTS_RAW",  # KPTS_RAW, KPTS_RANSAC, LINES_RAW, LINES_RANSAC,
-) -> Tuple[np.ndarray, int]:
-    """
-    Displays the matches between two images.
-    Args:
-        pred: Dictionary containing the original images and the matches.
-        titles: Optional titles for the plot.
-        dpi: Resolution of the plot.
-    Returns:
-        The resulting concatenated plot and the number of inliers.
-    """
-    img0 = pred["image0_orig"]
-    img1 = pred["image1_orig"]
-    num_inliers = 0
-    KPTS0_KEY = None
-    KPTS1_KEY = None
-    confid = None
-    if tag == "KPTS_RAW":
-        KPTS0_KEY = "mkeypoints0_orig"
-        KPTS1_KEY = "mkeypoints1_orig"
-        if "mconf" in pred:
-            confid = pred["mconf"]
-    elif tag == "KPTS_RANSAC":
-        KPTS0_KEY = "mmkeypoints0_orig"
-        KPTS1_KEY = "mmkeypoints1_orig"
-        if "mmconf" in pred:
-            confid = pred["mmconf"]
-    else:
-        # TODO: LINES_RAW, LINES_RANSAC
-        raise ValueError(f"Unknown tag: {tag}")
-    # draw raw matches
-    if (
-        KPTS0_KEY in pred
-        and KPTS1_KEY in pred
-        and pred[KPTS0_KEY] is not None
-        and pred[KPTS1_KEY] is not None
-    ):  # draw ransac matches
-        mkpts0 = pred[KPTS0_KEY]
-        mkpts1 = pred[KPTS1_KEY]
-        num_inliers = len(mkpts0)
-        if confid is None:
-            confid = np.ones(len(mkpts0))
-        fig_mkpts = draw_matches_core(
-            mkpts0,
-            mkpts1,
-            img0,
-            img1,
-            confid,
-            dpi=dpi,
-            titles=titles,
-            texts=texts,
-        )
-        fig = fig_mkpts
-    elif (
-        "line0_orig" in pred
-        and "line1_orig" in pred
-        and pred["line0_orig"] is not None
-        and pred["line1_orig"] is not None
-        # and (tag == "LINES_RAW" or tag == "LINES_RANSAC")
-    ):
-        # lines
-        mtlines0 = pred["line0_orig"]
-        mtlines1 = pred["line1_orig"]
-        num_inliers = len(mtlines0)
-        fig_lines = plot_images(
-            [img0.squeeze(), img1.squeeze()],
-            ["Image 0 - matched lines", "Image 1 - matched lines"],
-            dpi=300,
-        )
-        fig_lines = plot_color_line_matches([mtlines0, mtlines1], lw=2)
-        fig_lines = fig2im(fig_lines)
-        # keypoints
-        mkpts0 = pred.get("line_keypoints0_orig")
-        mkpts1 = pred.get("line_keypoints1_orig")
-        fig = None
-        if mkpts0 is not None and mkpts1 is not None:
-            num_inliers = len(mkpts0)
-            if "mconf" in pred:
-                mconf = pred["mconf"]
-            else:
-                mconf = np.ones(len(mkpts0))
-            fig_mkpts = draw_matches_core(mkpts0, mkpts1, img0, img1, mconf, dpi=300)
-            fig_lines = cv2.resize(fig_lines, (fig_mkpts.shape[1], fig_mkpts.shape[0]))
-            fig = np.concatenate([fig_mkpts, fig_lines], axis=0)
-        else:
-            fig = fig_lines
-    return fig, num_inliers

+import typing
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple, Union
+import cv2
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import seaborn as sns
+from ..hloc.utils.viz import add_text, plot_keypoints
+np.random.seed(1995)
+color_map = np.arange(100)
+np.random.shuffle(color_map)
+def plot_images(
+    imgs: List[np.ndarray],
+    titles: Optional[List[str]] = None,
+    cmaps: Union[str, List[str]] = "gray",
+    dpi: int = 100,
+    size: Optional[int] = 5,
+    pad: float = 0.5,
+) -> plt.Figure:
+    """Plot a set of images horizontally.
+    Args:
+        imgs: a list of NumPy or PyTorch images, RGB (H, W, 3) or mono (H, W).
+        titles: a list of strings, as titles for each image.
+        cmaps: colormaps for monochrome images. If a single string is given,
+            it is used for all images.
+        dpi: DPI of the figure.
+        size: figure size in inches (width). If not provided, the figure
+            size is determined automatically.
+        pad: padding between subplots, in inches.
+    Returns:
+        The created figure.
+    """
+    n = len(imgs)
+    if not isinstance(cmaps, list):
+        cmaps = [cmaps] * n
+    figsize = (size * n, size * 6 / 5) if size is not None else None
+    fig, ax = plt.subplots(1, n, figsize=figsize, dpi=dpi)
+    if n == 1:
+        ax = [ax]
+    for i in range(n):
+        ax[i].imshow(imgs[i], cmap=plt.get_cmap(cmaps[i]))
+        ax[i].get_yaxis().set_ticks([])
+        ax[i].get_xaxis().set_ticks([])
+        ax[i].set_axis_off()
+        for spine in ax[i].spines.values():  # remove frame
+            spine.set_visible(False)
+        if titles:
+            ax[i].set_title(titles[i])
+    fig.tight_layout(pad=pad)
+    return fig
+def plot_color_line_matches(
+    lines: List[np.ndarray],
+    correct_matches: Optional[np.ndarray] = None,
+    lw: float = 2.0,
+    indices: Tuple[int, int] = (0, 1),
+) -> matplotlib.figure.Figure:
+    """Plot line matches for existing images with multiple colors.
+    Args:
+        lines: List of ndarrays of size (N, 2, 2) representing line segments.
+        correct_matches: Optional bool array of size (N,) indicating correct
+            matches. If not None, display wrong matches with a low alpha.
+        lw: Line width as float pixels.
+        indices: Indices of the images to draw the matches on.
+    Returns:
+        The modified matplotlib figure.
+    """
+    n_lines = lines[0].shape[0]
+    colors = sns.color_palette("husl", n_colors=n_lines)
+    np.random.shuffle(colors)
+    alphas = np.ones(n_lines)
+    if correct_matches is not None:
+        alphas[~np.array(correct_matches)] = 0.2
+    fig = plt.gcf()
+    ax = typing.cast(List[matplotlib.axes.Axes], fig.axes)
+    assert len(ax) > max(indices)
+    axes = [ax[i] for i in indices]
+    fig.canvas.draw()
+    # Plot the lines
+    for a, l in zip(axes, lines):  # noqa: E741
+        # Transform the points into the figure coordinate system
+        transFigure = fig.transFigure.inverted()
+        endpoint0 = transFigure.transform(a.transData.transform(l[:, 0]))
+        endpoint1 = transFigure.transform(a.transData.transform(l[:, 1]))
+        fig.lines += [
+            matplotlib.lines.Line2D(
+                (endpoint0[i, 0], endpoint1[i, 0]),
+                (endpoint0[i, 1], endpoint1[i, 1]),
+                zorder=1,
+                transform=fig.transFigure,
+                c=colors[i],
+                alpha=alphas[i],
+                linewidth=lw,
+            )
+            for i in range(n_lines)
+        ]
+    return fig
+def make_matching_figure(
+    img0: np.ndarray,
+    img1: np.ndarray,
+    mkpts0: np.ndarray,
+    mkpts1: np.ndarray,
+    color: np.ndarray,
+    titles: Optional[List[str]] = None,
+    kpts0: Optional[np.ndarray] = None,
+    kpts1: Optional[np.ndarray] = None,
+    text: List[str] = [],
+    dpi: int = 75,
+    path: Optional[Path] = None,
+    pad: float = 0.0,
+) -> Optional[plt.Figure]:
+    """Draw image pair with matches.
+    Args:
+        img0: image0 as HxWx3 numpy array.
+        img1: image1 as HxWx3 numpy array.
+        mkpts0: matched points in image0 as Nx2 numpy array.
+        mkpts1: matched points in image1 as Nx2 numpy array.
+        color: colors for the matches as Nx4 numpy array.
+        titles: titles for the two subplots.
+        kpts0: keypoints in image0 as Kx2 numpy array.
+        kpts1: keypoints in image1 as Kx2 numpy array.
+        text: list of strings to display in the top-left corner of the image.
+        dpi: dots per inch of the saved figure.
+        path: if not None, save the figure to this path.
+        pad: padding around the image as a fraction of the image size.
+    Returns:
+        The matplotlib Figure object if path is None.
+    """
+    # draw image pair
+    fig, axes = plt.subplots(1, 2, figsize=(10, 6), dpi=dpi)
+    axes[0].imshow(img0)  # , cmap='gray')
+    axes[1].imshow(img1)  # , cmap='gray')
+    for i in range(2):  # clear all frames
+        axes[i].get_yaxis().set_ticks([])
+        axes[i].get_xaxis().set_ticks([])
+        for spine in axes[i].spines.values():
+            spine.set_visible(False)
+        if titles is not None:
+            axes[i].set_title(titles[i])
+    plt.tight_layout(pad=pad)
+    if kpts0 is not None:
+        assert kpts1 is not None
+        axes[0].scatter(kpts0[:, 0], kpts0[:, 1], c="w", s=5)
+        axes[1].scatter(kpts1[:, 0], kpts1[:, 1], c="w", s=5)
+    # draw matches
+    if mkpts0.shape[0] != 0 and mkpts1.shape[0] != 0 and mkpts0.shape == mkpts1.shape:
+        fig.canvas.draw()
+        transFigure = fig.transFigure.inverted()
+        fkpts0 = transFigure.transform(axes[0].transData.transform(mkpts0))
+        fkpts1 = transFigure.transform(axes[1].transData.transform(mkpts1))
+        fig.lines = [
+            matplotlib.lines.Line2D(
+                (fkpts0[i, 0], fkpts1[i, 0]),
+                (fkpts0[i, 1], fkpts1[i, 1]),
+                transform=fig.transFigure,
+                c=color[i],
+                linewidth=2,
+            )
+            for i in range(len(mkpts0))
+        ]
+        # freeze the axes to prevent the transform to change
+        axes[0].autoscale(enable=False)
+        axes[1].autoscale(enable=False)
+        axes[0].scatter(mkpts0[:, 0], mkpts0[:, 1], c=color[..., :3], s=4)
+        axes[1].scatter(mkpts1[:, 0], mkpts1[:, 1], c=color[..., :3], s=4)
+    # put txts
+    txt_color = "k" if img0[:100, :200].mean() > 200 else "w"
+    fig.text(
+        0.01,
+        0.99,
+        "\n".join(text),
+        transform=fig.axes[0].transAxes,
+        fontsize=15,
+        va="top",
+        ha="left",
+        color=txt_color,
+    )
+    # save or return figure
+    if path:
+        plt.savefig(str(path), bbox_inches="tight", pad_inches=0)
+        plt.close()
+    else:
+        return fig
+def error_colormap(err: np.ndarray, thr: float, alpha: float = 1.0) -> np.ndarray:
+    """
+    Create a colormap based on the error values.
+    Args:
+        err: Error values as a numpy array of shape (N,).
+        thr: Threshold value for the error.
+        alpha: Alpha value for the colormap, between 0 and 1.
+    Returns:
+        Colormap as a numpy array of shape (N, 4) with values in [0, 1].
+    """
+    assert alpha <= 1.0 and alpha > 0, f"Invaid alpha value: {alpha}"
+    x = 1 - np.clip(err / (thr * 2), 0, 1)
+    return np.clip(
+        np.stack([2 - x * 2, x * 2, np.zeros_like(x), np.ones_like(x) * alpha], -1),
+        0,
+        1,
+    )
+def fig2im(fig: matplotlib.figure.Figure) -> np.ndarray:
+    """
+    Convert a matplotlib figure to a numpy array with RGB values.
+    Args:
+        fig: A matplotlib figure.
+    Returns:
+        A numpy array with shape (height, width, 3) and dtype uint8 containing
+        the RGB values of the figure.
+    """
+    fig.canvas.draw()
+    (width, height) = fig.canvas.get_width_height()
+    buf_ndarray = np.frombuffer(fig.canvas.tostring_rgb(), dtype="u1")
+    return buf_ndarray.reshape(height, width, 3)
+def draw_matches_core(
+    mkpts0: List[np.ndarray],
+    mkpts1: List[np.ndarray],
+    img0: np.ndarray,
+    img1: np.ndarray,
+    conf: np.ndarray,
+    titles: Optional[List[str]] = None,
+    texts: Optional[List[str]] = None,
+    dpi: int = 150,
+    path: Optional[str] = None,
+    pad: float = 0.5,
+) -> np.ndarray:
+    """
+    Draw matches between two images.
+    Args:
+        mkpts0: List of matches from the first image, with shape (N, 2)
+        mkpts1: List of matches from the second image, with shape (N, 2)
+        img0: First image, with shape (H, W, 3)
+        img1: Second image, with shape (H, W, 3)
+        conf: Confidence values for the matches, with shape (N,)
+        titles: Optional list of title strings for the plot
+        dpi: DPI for the saved image
+        path: Optional path to save the image to. If None, the image is not saved.
+        pad: Padding between subplots
+    Returns:
+        The figure as a numpy array with shape (height, width, 3) and dtype uint8
+        containing the RGB values of the figure.
+    """
+    thr = 0.5
+    color = error_colormap(1 - conf, thr, alpha=0.1)
+    text = [
+        # "image name",
+        f"#Matches: {len(mkpts0)}",
+    ]
+    if path:
+        fig2im(
+            make_matching_figure(
+                img0,
+                img1,
+                mkpts0,
+                mkpts1,
+                color,
+                titles=titles,
+                text=text,
+                path=path,
+                dpi=dpi,
+                pad=pad,
+            )
+        )
+    else:
+        return fig2im(
+            make_matching_figure(
+                img0,
+                img1,
+                mkpts0,
+                mkpts1,
+                color,
+                titles=titles,
+                text=text,
+                pad=pad,
+                dpi=dpi,
+            )
+        )
+def draw_image_pairs(
+    img0: np.ndarray,
+    img1: np.ndarray,
+    text: List[str] = [],
+    dpi: int = 75,
+    path: Optional[str] = None,
+    pad: float = 0.5,
+) -> np.ndarray:
+    """Draw image pair horizontally.
+    Args:
+        img0: First image, with shape (H, W, 3)
+        img1: Second image, with shape (H, W, 3)
+        text: List of strings to print. Each string is a new line.
+        dpi: DPI of the figure.
+        path: Path to save the image to. If None, the image is not saved and
+            the function returns the figure as a numpy array with shape
+            (height, width, 3) and dtype uint8 containing the RGB values of the
+            figure.
+        pad: Padding between subplots
+    Returns:
+        The figure as a numpy array with shape (height, width, 3) and dtype uint8
+        containing the RGB values of the figure, or None if path is not None.
+    """
+    # draw image pair
+    fig, axes = plt.subplots(1, 2, figsize=(10, 6), dpi=dpi)
+    axes[0].imshow(img0)  # , cmap='gray')
+    axes[1].imshow(img1)  # , cmap='gray')
+    for i in range(2):  # clear all frames
+        axes[i].get_yaxis().set_ticks([])
+        axes[i].get_xaxis().set_ticks([])
+        for spine in axes[i].spines.values():
+            spine.set_visible(False)
+    plt.tight_layout(pad=pad)
+    # put txts
+    txt_color = "k" if img0[:100, :200].mean() > 200 else "w"
+    fig.text(
+        0.01,
+        0.99,
+        "\n".join(text),
+        transform=fig.axes[0].transAxes,
+        fontsize=15,
+        va="top",
+        ha="left",
+        color=txt_color,
+    )
+    # save or return figure
+    if path:
+        plt.savefig(str(path), bbox_inches="tight", pad_inches=0)
+        plt.close()
+    else:
+        return fig2im(fig)
+def display_keypoints(pred: dict, titles: List[str] = []):
+    img0 = pred["image0_orig"]
+    img1 = pred["image1_orig"]
+    output_keypoints = plot_images([img0, img1], titles=titles, dpi=300)
+    if "keypoints0_orig" in pred.keys() and "keypoints1_orig" in pred.keys():
+        plot_keypoints([pred["keypoints0_orig"], pred["keypoints1_orig"]])
+        text = (
+            f"# keypoints0: {len(pred['keypoints0_orig'])} \n"
+            + f"# keypoints1: {len(pred['keypoints1_orig'])}"
+        )
+        add_text(0, text, fs=15)
+    output_keypoints = fig2im(output_keypoints)
+    return output_keypoints
+def display_matches(
+    pred: Dict[str, np.ndarray],
+    titles: List[str] = [],
+    texts: List[str] = [],
+    dpi: int = 300,
+    tag: str = "KPTS_RAW",  # KPTS_RAW, KPTS_RANSAC, LINES_RAW, LINES_RANSAC,
+) -> Tuple[np.ndarray, int]:
+    """
+    Displays the matches between two images.
+    Args:
+        pred: Dictionary containing the original images and the matches.
+        titles: Optional titles for the plot.
+        dpi: Resolution of the plot.
+    Returns:
+        The resulting concatenated plot and the number of inliers.
+    """
+    img0 = pred["image0_orig"]
+    img1 = pred["image1_orig"]
+    num_inliers = 0
+    KPTS0_KEY = None
+    KPTS1_KEY = None
+    confid = None
+    if tag == "KPTS_RAW":
+        KPTS0_KEY = "mkeypoints0_orig"
+        KPTS1_KEY = "mkeypoints1_orig"
+        if "mconf" in pred:
+            confid = pred["mconf"]
+    elif tag == "KPTS_RANSAC":
+        KPTS0_KEY = "mmkeypoints0_orig"
+        KPTS1_KEY = "mmkeypoints1_orig"
+        if "mmconf" in pred:
+            confid = pred["mmconf"]
+    else:
+        # TODO: LINES_RAW, LINES_RANSAC
+        raise ValueError(f"Unknown tag: {tag}")
+    # draw raw matches
+    if (
+        KPTS0_KEY in pred
+        and KPTS1_KEY in pred
+        and pred[KPTS0_KEY] is not None
+        and pred[KPTS1_KEY] is not None
+    ):  # draw ransac matches
+        mkpts0 = pred[KPTS0_KEY]
+        mkpts1 = pred[KPTS1_KEY]
+        num_inliers = len(mkpts0)
+        if confid is None:
+            confid = np.ones(len(mkpts0))
+        fig_mkpts = draw_matches_core(
+            mkpts0,
+            mkpts1,
+            img0,
+            img1,
+            confid,
+            dpi=dpi,
+            titles=titles,
+            texts=texts,
+        )
+        fig = fig_mkpts
+    elif (
+        "line0_orig" in pred
+        and "line1_orig" in pred
+        and pred["line0_orig"] is not None
+        and pred["line1_orig"] is not None
+        # and (tag == "LINES_RAW" or tag == "LINES_RANSAC")
+    ):
+        # lines
+        mtlines0 = pred["line0_orig"]
+        mtlines1 = pred["line1_orig"]
+        num_inliers = len(mtlines0)
+        fig_lines = plot_images(
+            [img0.squeeze(), img1.squeeze()],
+            ["Image 0 - matched lines", "Image 1 - matched lines"],
+            dpi=300,
+        )
+        fig_lines = plot_color_line_matches([mtlines0, mtlines1], lw=2)
+        fig_lines = fig2im(fig_lines)
+        # keypoints
+        mkpts0 = pred.get("line_keypoints0_orig")
+        mkpts1 = pred.get("line_keypoints1_orig")
+        fig = None
+        if mkpts0 is not None and mkpts1 is not None:
+            num_inliers = len(mkpts0)
+            if "mconf" in pred:
+                mconf = pred["mconf"]
+            else:
+                mconf = np.ones(len(mkpts0))
+            fig_mkpts = draw_matches_core(mkpts0, mkpts1, img0, img1, mconf, dpi=300)
+            fig_lines = cv2.resize(fig_lines, (fig_mkpts.shape[1], fig_mkpts.shape[0]))
+            fig = np.concatenate([fig_mkpts, fig_lines], axis=0)
+        else:
+            fig = fig_lines
+    return fig, num_inliers