Spaces:

Form-Fighter
/

FormFighterAIStack

Sleeping

App Files Files Community

Techt3o commited on Oct 23, 2024

Commit

20ae9ff

verified ·

1 Parent(s): 9796b13

1c0fe995ac3bf6c4bc83a727a73c46ab2d045729fb0abd53c4c78cd2b8282877

Browse files

Files changed (50) hide show

third-party/DPVO/dpvo.egg-info/PKG-INFO +11 -0
third-party/DPVO/dpvo.egg-info/SOURCES.txt +50 -0
third-party/DPVO/dpvo.egg-info/dependency_links.txt +1 -0
third-party/DPVO/dpvo.egg-info/top_level.txt +4 -0
third-party/DPVO/dpvo/data_readers/augmentation.py +66 -0
third-party/DPVO/dpvo/data_readers/base.py +176 -0
third-party/DPVO/dpvo/data_readers/factory.py +26 -0
third-party/DPVO/dpvo/data_readers/frame_utils.py +164 -0
third-party/DPVO/dpvo/data_readers/rgbd_utils.py +188 -0
third-party/DPVO/dpvo/data_readers/tartan.py +110 -0
third-party/DPVO/dpvo/data_readers/tartan_test.txt +32 -0
third-party/DPVO/dpvo/dpvo.py +473 -0
third-party/DPVO/dpvo/extractor.py +264 -0
third-party/DPVO/dpvo/fastba/__init__.py +1 -0
third-party/DPVO/dpvo/fastba/ba.cpp +189 -0
third-party/DPVO/dpvo/fastba/ba.py +8 -0
third-party/DPVO/dpvo/fastba/ba_cuda.cu +617 -0
third-party/DPVO/dpvo/fastba/block_e.cu +300 -0
third-party/DPVO/dpvo/fastba/block_e.cuh +26 -0
third-party/DPVO/dpvo/lietorch/__init__.py +2 -0
third-party/DPVO/dpvo/lietorch/broadcasting.py +31 -0
third-party/DPVO/dpvo/lietorch/gradcheck.py +592 -0
third-party/DPVO/dpvo/lietorch/group_ops.py +102 -0
third-party/DPVO/dpvo/lietorch/groups.py +322 -0
third-party/DPVO/dpvo/lietorch/include/common.h +12 -0
third-party/DPVO/dpvo/lietorch/include/dispatch.h +48 -0
third-party/DPVO/dpvo/lietorch/include/lietorch_cpu.h +51 -0
third-party/DPVO/dpvo/lietorch/include/lietorch_gpu.h +51 -0
third-party/DPVO/dpvo/lietorch/include/rxso3.h +324 -0
third-party/DPVO/dpvo/lietorch/include/se3.h +229 -0
third-party/DPVO/dpvo/lietorch/include/sim3.h +217 -0
third-party/DPVO/dpvo/lietorch/include/so3.h +229 -0
third-party/DPVO/dpvo/lietorch/run_tests.py +302 -0
third-party/DPVO/dpvo/lietorch/src/lietorch.cpp +317 -0
third-party/DPVO/dpvo/lietorch/src/lietorch_cpu.cpp +657 -0
third-party/DPVO/dpvo/lietorch/src/lietorch_gpu.cu +601 -0
third-party/DPVO/dpvo/logger.py +58 -0
third-party/DPVO/dpvo/loop_closure/long_term.py +267 -0
third-party/DPVO/dpvo/loop_closure/optim_utils.py +243 -0
third-party/DPVO/dpvo/loop_closure/retrieval/__init__.py +2 -0
third-party/DPVO/dpvo/loop_closure/retrieval/image_cache.py +72 -0
third-party/DPVO/dpvo/loop_closure/retrieval/retrieval_dbow.py +125 -0
third-party/DPVO/dpvo/net.py +273 -0
third-party/DPVO/dpvo/patchgraph.py +111 -0
third-party/DPVO/dpvo/plot_utils.py +64 -0
third-party/DPVO/dpvo/projective_ops.py +130 -0
third-party/DPVO/dpvo/stream.py +89 -0
third-party/DPVO/dpvo/utils.py +88 -0
third-party/DPVO/environment.yml +26 -0
third-party/DPVO/evaluate_euroc.py +145 -0

third-party/DPVO/dpvo.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,11 @@

+Metadata-Version: 2.1
+Name: dpvo
+Version: 0.0.0
+Summary: UNKNOWN
+Home-page: UNKNOWN
+License: UNKNOWN
+Platform: UNKNOWN
+License-File: LICENSE
+UNKNOWN

third-party/DPVO/dpvo.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,50 @@

+LICENSE
+README.md
+setup.py
+dpvo/__init__.py
+dpvo/ba.py
+dpvo/blocks.py
+dpvo/config.py
+dpvo/dpvo.py
+dpvo/extractor.py
+dpvo/logger.py
+dpvo/net.py
+dpvo/plot_utils.py
+dpvo/projective_ops.py
+dpvo/stream.py
+dpvo/utils.py
+dpvo.egg-info/PKG-INFO
+dpvo.egg-info/SOURCES.txt
+dpvo.egg-info/dependency_links.txt
+dpvo.egg-info/top_level.txt
+dpvo/altcorr/correlation.cpp
+dpvo/altcorr/correlation_kernel.cu
+dpvo/fastba/ba.cpp
+dpvo/fastba/ba_cuda.cu
+dpvo/lietorch/src/lietorch.cpp
+dpvo/lietorch/src/lietorch_cpu.cpp
+dpvo/lietorch/src/lietorch_gpu.cu
+dpvo/altcorr/__init__.py
+dpvo/altcorr/correlation.cpp
+dpvo/altcorr/correlation.py
+dpvo/altcorr/correlation_kernel.cu
+dpvo/data_readers/__init__.py
+dpvo/data_readers/augmentation.py
+dpvo/data_readers/base.py
+dpvo/data_readers/factory.py
+dpvo/data_readers/frame_utils.py
+dpvo/data_readers/rgbd_utils.py
+dpvo/data_readers/tartan.py
+dpvo/fastba/__init__.py
+dpvo/fastba/ba.cpp
+dpvo/fastba/ba.py
+dpvo/fastba/ba_cuda.cu
+dpvo/lietorch/__init__.py
+dpvo/lietorch/broadcasting.py
+dpvo/lietorch/gradcheck.py
+dpvo/lietorch/group_ops.py
+dpvo/lietorch/groups.py
+dpvo/lietorch/run_tests.py
+dpvo/lietorch/src/lietorch.cpp
+dpvo/lietorch/src/lietorch_cpu.cpp
+dpvo/lietorch/src/lietorch_gpu.cu

third-party/DPVO/dpvo.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

third-party/DPVO/dpvo.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+cuda_ba
+cuda_corr
+dpvo
+lietorch_backends

third-party/DPVO/dpvo/data_readers/augmentation.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import torch
+import torchvision.transforms as transforms
+import numpy as np
+import torch.nn.functional as F
+class RGBDAugmentor:
+    """ perform augmentation on RGB-D video """
+    def __init__(self, crop_size):
+        self.crop_size = crop_size
+        self.augcolor = transforms.Compose([
+            transforms.ToPILImage(),
+            transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2/3.14),
+            transforms.RandomGrayscale(p=0.1),
+            transforms.RandomInvert(p=0.1),
+            transforms.ToTensor()])
+        self.max_scale = 0.5
+    def spatial_transform(self, images, depths, poses, intrinsics):
+        """ cropping and resizing """
+        ht, wd = images.shape[2:]
+        max_scale = self.max_scale
+        min_scale = np.log2(np.maximum(
+            (self.crop_size[0] + 1) / float(ht),
+            (self.crop_size[1] + 1) / float(wd)))
+        scale = 1
+        if np.random.rand() < 0.8:
+            scale = 2 ** np.random.uniform(0.0, max_scale)
+        intrinsics = scale * intrinsics
+        ht1 = int(scale * ht)
+        wd1 = int(scale * wd)
+        depths = depths.unsqueeze(dim=1)
+        images = F.interpolate(images, (ht1, wd1), mode='bicubic', align_corners=False)
+        depths = F.interpolate(depths, (ht1, wd1), recompute_scale_factor=False)
+        # always perform center crop (TODO: try non-center crops)
+        y0 = (images.shape[2] - self.crop_size[0]) // 2
+        x0 = (images.shape[3] - self.crop_size[1]) // 2
+        intrinsics = intrinsics - torch.tensor([0.0, 0.0, x0, y0])
+        images = images[:, :, y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        depths = depths[:, :, y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        depths = depths.squeeze(dim=1)
+        return images, poses, depths, intrinsics
+    def color_transform(self, images):
+        """ color jittering """
+        num, ch, ht, wd = images.shape
+        images = images.permute(1, 2, 3, 0).reshape(ch, ht, wd*num)
+        images = 255 * self.augcolor(images[[2,1,0]] / 255.0)
+        return images[[2,1,0]].reshape(ch, ht, wd, num).permute(3,0,1,2).contiguous()
+    def __call__(self, images, poses, depths, intrinsics):
+        if np.random.rand() < 0.5:
+            images = self.color_transform(images)
+        return self.spatial_transform(images, depths, poses, intrinsics)

third-party/DPVO/dpvo/data_readers/base.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import numpy as np
+import torch
+import torch.utils.data as data
+import torch.nn.functional as F
+import csv
+import os
+import cv2
+import math
+import random
+import json
+import pickle
+import os.path as osp
+from .augmentation import RGBDAugmentor
+from .rgbd_utils import *
+class RGBDDataset(data.Dataset):
+    def __init__(self, name, datapath, n_frames=4, crop_size=[480,640], fmin=10.0, fmax=75.0, aug=True, sample=True):
+        """ Base class for RGBD dataset """
+        self.aug = None
+        self.root = datapath
+        self.name = name
+        self.aug = aug
+        self.sample = sample
+        self.n_frames = n_frames
+        self.fmin = fmin # exclude very easy examples
+        self.fmax = fmax # exclude very hard examples
+        if self.aug:
+            self.aug = RGBDAugmentor(crop_size=crop_size)
+        # building dataset is expensive, cache so only needs to be performed once
+        cur_path = osp.dirname(osp.abspath(__file__))
+        if not os.path.isdir(osp.join(cur_path, 'cache')):
+            os.mkdir(osp.join(cur_path, 'cache'))
+        self.scene_info = \
+            pickle.load(open('datasets/TartanAir.pickle', 'rb'))[0]
+        self._build_dataset_index()
+    def _build_dataset_index(self):
+        self.dataset_index = []
+        for scene in self.scene_info:
+            if not self.__class__.is_test_scene(scene):
+                graph = self.scene_info[scene]['graph']
+                for i in graph:
+                    if i < len(graph) - 65:
+                        self.dataset_index.append((scene, i))
+            else:
+                print("Reserving {} for validation".format(scene))
+    @staticmethod
+    def image_read(image_file):
+        return cv2.imread(image_file)
+    @staticmethod
+    def depth_read(depth_file):
+        return np.load(depth_file)
+    def build_frame_graph(self, poses, depths, intrinsics, f=16, max_flow=256):
+        """ compute optical flow distance between all pairs of frames """
+        def read_disp(fn):
+            depth = self.__class__.depth_read(fn)[f//2::f, f//2::f]
+            depth[depth < 0.01] = np.mean(depth)
+            return 1.0 / depth
+        poses = np.array(poses)
+        intrinsics = np.array(intrinsics) / f
+        disps = np.stack(list(map(read_disp, depths)), 0)
+        d = f * compute_distance_matrix_flow(poses, disps, intrinsics)
+        graph = {}
+        for i in range(d.shape[0]):
+            j, = np.where(d[i] < max_flow)
+            graph[i] = (j, d[i,j])
+        return graph
+    def __getitem__(self, index):
+        """ return training video """
+        index = index % len(self.dataset_index)
+        scene_id, ix = self.dataset_index[index]
+        frame_graph = self.scene_info[scene_id]['graph']
+        images_list = self.scene_info[scene_id]['images']
+        depths_list = self.scene_info[scene_id]['depths']
+        poses_list = self.scene_info[scene_id]['poses']
+        intrinsics_list = self.scene_info[scene_id]['intrinsics']
+        # stride = np.random.choice([1,2,3])
+        d = np.random.uniform(self.fmin, self.fmax)
+        s = 1
+        inds = [ ix ]
+        while len(inds) < self.n_frames:
+            # get other frames within flow threshold
+            if self.sample:
+                k = (frame_graph[ix][1] > self.fmin) & (frame_graph[ix][1] < self.fmax)
+                frames = frame_graph[ix][0][k]
+                # prefer frames forward in time
+                if np.count_nonzero(frames[frames > ix]):
+                    ix = np.random.choice(frames[frames > ix])
+                elif ix + 1 < len(images_list):
+                    ix = ix + 1
+                elif np.count_nonzero(frames):
+                    ix = np.random.choice(frames)
+            else:
+                i = frame_graph[ix][0].copy()
+                g = frame_graph[ix][1].copy()
+                g[g > d] = -1
+                if s > 0:
+                    g[i <= ix] = -1
+                else:
+                    g[i >= ix] = -1
+                if len(g) > 0 and np.max(g) > 0:
+                    ix = i[np.argmax(g)]
+                else:
+                    if ix + s >= len(images_list) or ix + s < 0:
+                        s *= -1
+                    ix = ix + s
+            inds += [ ix ]
+        images, depths, poses, intrinsics = [], [], [], []
+        for i in inds:
+            images.append(self.__class__.image_read(images_list[i]))
+            depths.append(self.__class__.depth_read(depths_list[i]))
+            poses.append(poses_list[i])
+            intrinsics.append(intrinsics_list[i])
+        images = np.stack(images).astype(np.float32)
+        depths = np.stack(depths).astype(np.float32)
+        poses = np.stack(poses).astype(np.float32)
+        intrinsics = np.stack(intrinsics).astype(np.float32)
+        images = torch.from_numpy(images).float()
+        images = images.permute(0, 3, 1, 2)
+        disps = torch.from_numpy(1.0 / depths)
+        poses = torch.from_numpy(poses)
+        intrinsics = torch.from_numpy(intrinsics)
+        if self.aug:
+            images, poses, disps, intrinsics = \
+                self.aug(images, poses, disps, intrinsics)
+        # normalize depth
+        s = .7 * torch.quantile(disps, .98)
+        disps = disps / s
+        poses[...,:3] *= s
+        return images, poses, disps, intrinsics
+    def __len__(self):
+        return len(self.dataset_index)
+    def __imul__(self, x):
+        self.dataset_index *= x
+        return self

third-party/DPVO/dpvo/data_readers/factory.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import pickle
+import os
+import os.path as osp
+# RGBD-Dataset
+from .tartan import TartanAir
+def dataset_factory(dataset_list, **kwargs):
+    """ create a combined dataset """
+    from torch.utils.data import ConcatDataset
+    dataset_map = {
+        'tartan': (TartanAir, ),
+    }
+    db_list = []
+    for key in dataset_list:
+        # cache datasets for faster future loading
+        db = dataset_map[key][0](**kwargs)
+        print("Dataset {} has {} images".format(key, len(db)))
+        db_list.append(db)
+    return ConcatDataset(db_list)

third-party/DPVO/dpvo/data_readers/frame_utils.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import numpy as np
+from PIL import Image
+from os.path import *
+import re
+import cv2
+cv2.setNumThreads(0)
+TAG_CHAR = np.array([202021.25], np.float32)
+def readFlowKITTI(filename):
+    flow = cv2.imread(filename, cv2.IMREAD_ANYDEPTH|cv2.IMREAD_COLOR)
+    flow = flow[:,:,::-1].astype(np.float32)
+    flow, valid = flow[:, :, :2], flow[:, :, 2]
+    flow = (flow - 2**15) / 64.0
+    return flow, valid
+def readFlow(fn):
+    """ Read .flo file in Middlebury format"""
+    # Code adapted from:
+    # http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy
+    # WARNING: this will work on little-endian architectures (eg Intel x86) only!
+    # print 'fn = %s'%(fn)
+    with open(fn, 'rb') as f:
+        magic = np.fromfile(f, np.float32, count=1)
+        if 202021.25 != magic:
+            print('Magic number incorrect. Invalid .flo file')
+            return None
+        else:
+            w = np.fromfile(f, np.int32, count=1)
+            h = np.fromfile(f, np.int32, count=1)
+            # print 'Reading %d x %d flo file\n' % (w, h)
+            data = np.fromfile(f, np.float32, count=2*int(w)*int(h))
+            # Reshape data into 3D array (columns, rows, bands)
+            # The reshape here is for visualization, the original code is (w,h,2)
+            return np.resize(data, (int(h), int(w), 2))
+def readPFM(file):
+    file = open(file, 'rb')
+    color = None
+    width = None
+    height = None
+    scale = None
+    endian = None
+    header = file.readline().rstrip()
+    if header == b'PF':
+        color = True
+    elif header == b'Pf':
+        color = False
+    else:
+        raise Exception('Not a PFM file.')
+    try:
+        dim_match = re.match(rb'^(\d+)\s(\d+)\s$', file.readline())
+    except:
+        dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline())
+    if dim_match:
+        width, height = map(int, dim_match.groups())
+    else:
+        raise Exception('Malformed PFM header.')
+    scale = float(file.readline().rstrip())
+    if scale < 0: # little-endian
+        endian = '<'
+        scale = -scale
+    else:
+        endian = '>' # big-endian
+    data = np.fromfile(file, endian + 'f')
+    shape = (height, width, 3) if color else (height, width)
+    data = np.reshape(data, shape)
+    data = np.flipud(data)
+    return data
+def writeFlow(filename,uv,v=None):
+    """ Write optical flow to file.
+    If v is None, uv is assumed to contain both u and v channels,
+    stacked in depth.
+    Original code by Deqing Sun, adapted from Daniel Scharstein.
+    """
+    nBands = 2
+    if v is None:
+        assert(uv.ndim == 3)
+        assert(uv.shape[2] == 2)
+        u = uv[:,:,0]
+        v = uv[:,:,1]
+    else:
+        u = uv
+    assert(u.shape == v.shape)
+    height,width = u.shape
+    f = open(filename,'wb')
+    # write the header
+    f.write(TAG_CHAR)
+    np.array(width).astype(np.int32).tofile(f)
+    np.array(height).astype(np.int32).tofile(f)
+    # arrange into matrix form
+    tmp = np.zeros((height, width*nBands))
+    tmp[:,np.arange(width)*2] = u
+    tmp[:,np.arange(width)*2 + 1] = v
+    tmp.astype(np.float32).tofile(f)
+    f.close()
+def readDPT(filename):
+    """ Read depth data from file, return as numpy array. """
+    f = open(filename,'rb')
+    check = np.fromfile(f,dtype=np.float32,count=1)[0]
+    TAG_FLOAT = 202021.25
+    TAG_CHAR = 'PIEH'
+    assert check == TAG_FLOAT, ' depth_read:: Wrong tag in flow file (should be: {0}, is: {1}). Big-endian machine? '.format(TAG_FLOAT,check)
+    width = np.fromfile(f,dtype=np.int32,count=1)[0]
+    height = np.fromfile(f,dtype=np.int32,count=1)[0]
+    size = width*height
+    assert width > 0 and height > 0 and size > 1 and size < 100000000, ' depth_read:: Wrong input size (width = {0}, height = {1}).'.format(width,height)
+    depth = np.fromfile(f,dtype=np.float32,count=-1).reshape((height,width))
+    return depth
+def cam_read(filename):
+    """ Read camera data, return (M,N) tuple.
+    M is the intrinsic matrix, N is the extrinsic matrix, so that
+    x = M*N*X,
+    with x being a point in homogeneous image pixel coordinates, X being a
+    point in homogeneous world coordinates."""
+    f = open(filename,'rb')
+    check = np.fromfile(f,dtype=np.float32,count=1)[0]
+    M = np.fromfile(f,dtype='float64',count=9).reshape((3,3))
+    N = np.fromfile(f,dtype='float64',count=12).reshape((3,4))
+    E = np.eye(4)
+    E[0:3,:] = N
+    fx, fy, cx, cy = M[0,0], M[1,1], M[0,2], M[1,2]
+    kvec = np.array([fx, fy, cx, cy])
+    q = Rotation.from_matrix(E[:3,:3]).as_quat()
+    pvec = np.concatenate([E[:3,3], q], 0)
+    return pvec, kvec
+def read_gen(file_name, pil=False):
+    ext = splitext(file_name)[-1]
+    if ext == '.png' or ext == '.jpeg' or ext == '.ppm' or ext == '.jpg':
+        return Image.open(file_name)
+    elif ext == '.bin' or ext == '.raw':
+        return np.load(file_name)
+    elif ext == '.flo':
+        return readFlow(file_name).astype(np.float32)
+    elif ext == '.pfm':
+        return readPFM(file_name).astype(np.float32)
+    elif ext == '.dpt':
+        return readDPT(file_name).astype(np.float32)
+    elif ext == '.cam':
+        return cam_read(file_name)
+    return []

third-party/DPVO/dpvo/data_readers/rgbd_utils.py ADDED Viewed

	@@ -0,0 +1,188 @@

+import numpy as np
+import os.path as osp
+import torch
+from ..lietorch import SE3
+from scipy.spatial.transform import Rotation
+def parse_list(filepath, skiprows=0):
+    """ read list data """
+    data = np.loadtxt(filepath, delimiter=' ', dtype=np.unicode_, skiprows=skiprows)
+    return data
+def associate_frames(tstamp_image, tstamp_depth, tstamp_pose, max_dt=1.0):
+    """ pair images, depths, and poses """
+    associations = []
+    for i, t in enumerate(tstamp_image):
+        if tstamp_pose is None:
+            j = np.argmin(np.abs(tstamp_depth - t))
+            if (np.abs(tstamp_depth[j] - t) < max_dt):
+                associations.append((i, j))
+        else:
+            j = np.argmin(np.abs(tstamp_depth - t))
+            k = np.argmin(np.abs(tstamp_pose - t))
+            if (np.abs(tstamp_depth[j] - t) < max_dt) and \
+                    (np.abs(tstamp_pose[k] - t) < max_dt):
+                associations.append((i, j, k))
+    return associations
+def loadtum(datapath, frame_rate=-1):
+    """ read video data in tum-rgbd format """
+    if osp.isfile(osp.join(datapath, 'groundtruth.txt')):
+        pose_list = osp.join(datapath, 'groundtruth.txt')
+    elif osp.isfile(osp.join(datapath, 'pose.txt')):
+        pose_list = osp.join(datapath, 'pose.txt')
+    else:
+        return None, None, None, None
+    image_list = osp.join(datapath, 'rgb.txt')
+    depth_list = osp.join(datapath, 'depth.txt')
+    calib_path = osp.join(datapath, 'calibration.txt')
+    intrinsic = None
+    if osp.isfile(calib_path):
+        intrinsic = np.loadtxt(calib_path, delimiter=' ')
+        intrinsic = intrinsic.astype(np.float64)
+    image_data = parse_list(image_list)
+    depth_data = parse_list(depth_list)
+    pose_data = parse_list(pose_list, skiprows=1)
+    pose_vecs = pose_data[:,1:].astype(np.float64)
+    tstamp_image = image_data[:,0].astype(np.float64)
+    tstamp_depth = depth_data[:,0].astype(np.float64)
+    tstamp_pose = pose_data[:,0].astype(np.float64)
+    associations = associate_frames(tstamp_image, tstamp_depth, tstamp_pose)
+    # print(len(tstamp_image))
+    # print(len(associations))
+    indicies = range(len(associations))[::5]
+    # indicies = [ 0 ]
+    # for i in range(1, len(associations)):
+    #     t0 = tstamp_image[associations[indicies[-1]][0]]
+    #     t1 = tstamp_image[associations[i][0]]
+    #     if t1 - t0 > 1.0 / frame_rate:
+    #         indicies += [ i ]
+    images, poses, depths, intrinsics, tstamps = [], [], [], [], []
+    for ix in indicies:
+        (i, j, k) = associations[ix]
+        images += [ osp.join(datapath, image_data[i,1]) ]
+        depths += [ osp.join(datapath, depth_data[j,1]) ]
+        poses += [ pose_vecs[k] ]
+        tstamps += [ tstamp_image[i] ]
+        if intrinsic is not None:
+            intrinsics += [ intrinsic ]
+    return images, depths, poses, intrinsics, tstamps
+def all_pairs_distance_matrix(poses, beta=2.5):
+    """ compute distance matrix between all pairs of poses """
+    poses = np.array(poses, dtype=np.float32)
+    poses[:,:3] *= beta # scale to balence rot + trans
+    poses = SE3(torch.from_numpy(poses))
+    r = (poses[:,None].inv() * poses[None,:]).log()
+    return r.norm(dim=-1).cpu().numpy()
+def pose_matrix_to_quaternion(pose):
+    """ convert 4x4 pose matrix to (t, q) """
+    q = Rotation.from_matrix(pose[:3, :3]).as_quat()
+    return np.concatenate([pose[:3, 3], q], axis=0)
+def compute_distance_matrix_flow(poses, disps, intrinsics):
+    """ compute flow magnitude between all pairs of frames """
+    if not isinstance(poses, SE3):
+        poses = torch.from_numpy(poses).float().cuda()[None]
+        poses = SE3(poses).inv()
+        disps = torch.from_numpy(disps).float().cuda()[None]
+        intrinsics = torch.from_numpy(intrinsics).float().cuda()[None]
+    N = poses.shape[1]
+    ii, jj = torch.meshgrid(torch.arange(N), torch.arange(N))
+    ii = ii.reshape(-1).cuda()
+    jj = jj.reshape(-1).cuda()
+    MAX_FLOW = 100.0
+    matrix = np.zeros((N, N), dtype=np.float32)
+    s = 2048
+    for i in range(0, ii.shape[0], s):
+        flow1, val1 = pops.induced_flow(poses, disps, intrinsics, ii[i:i+s], jj[i:i+s])
+        flow2, val2 = pops.induced_flow(poses, disps, intrinsics, jj[i:i+s], ii[i:i+s])
+        flow = torch.stack([flow1, flow2], dim=2)
+        val = torch.stack([val1, val2], dim=2)
+        mag = flow.norm(dim=-1).clamp(max=MAX_FLOW)
+        mag = mag.view(mag.shape[1], -1)
+        val = val.view(val.shape[1], -1)
+        mag = (mag * val).mean(-1) / val.mean(-1)
+        mag[val.mean(-1) < 0.7] = np.inf
+        i1 = ii[i:i+s].cpu().numpy()
+        j1 = jj[i:i+s].cpu().numpy()
+        matrix[i1, j1] = mag.cpu().numpy()
+    return matrix
+def compute_distance_matrix_flow2(poses, disps, intrinsics, beta=0.4):
+    """ compute flow magnitude between all pairs of frames """
+    # if not isinstance(poses, SE3):
+    #     poses = torch.from_numpy(poses).float().cuda()[None]
+    #     poses = SE3(poses).inv()
+    #     disps = torch.from_numpy(disps).float().cuda()[None]
+    #     intrinsics = torch.from_numpy(intrinsics).float().cuda()[None]
+    N = poses.shape[1]
+    ii, jj = torch.meshgrid(torch.arange(N), torch.arange(N))
+    ii = ii.reshape(-1)
+    jj = jj.reshape(-1)
+    MAX_FLOW = 128.0
+    matrix = np.zeros((N, N), dtype=np.float32)
+    s = 2048
+    for i in range(0, ii.shape[0], s):
+        flow1a, val1a = pops.induced_flow(poses, disps, intrinsics, ii[i:i+s], jj[i:i+s], tonly=True)
+        flow1b, val1b = pops.induced_flow(poses, disps, intrinsics, ii[i:i+s], jj[i:i+s])
+        flow2a, val2a = pops.induced_flow(poses, disps, intrinsics, jj[i:i+s], ii[i:i+s], tonly=True)
+        flow2b, val2b = pops.induced_flow(poses, disps, intrinsics, ii[i:i+s], jj[i:i+s])
+        flow1 = flow1a + beta * flow1b
+        val1 = val1a * val2b
+        flow2 = flow2a + beta * flow2b
+        val2 = val2a * val2b
+        flow = torch.stack([flow1, flow2], dim=2)
+        val = torch.stack([val1, val2], dim=2)
+        mag = flow.norm(dim=-1).clamp(max=MAX_FLOW)
+        mag = mag.view(mag.shape[1], -1)
+        val = val.view(val.shape[1], -1)
+        mag = (mag * val).mean(-1) / val.mean(-1)
+        mag[val.mean(-1) < 0.8] = np.inf
+        i1 = ii[i:i+s].cpu().numpy()
+        j1 = jj[i:i+s].cpu().numpy()
+        matrix[i1, j1] = mag.cpu().numpy()
+    return matrix

third-party/DPVO/dpvo/data_readers/tartan.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import numpy as np
+import torch
+import glob
+import cv2
+import os
+import os.path as osp
+from ..lietorch import SE3
+from .base import RGBDDataset
+# cur_path = osp.dirname(osp.abspath(__file__))
+# test_split = osp.join(cur_path, 'tartan_test.txt')
+# test_split = open(test_split).read().split()
+test_split = [
+    "abandonedfactory/abandonedfactory/Easy/P011",
+    "abandonedfactory/abandonedfactory/Hard/P011",
+    "abandonedfactory_night/abandonedfactory_night/Easy/P013",
+    "abandonedfactory_night/abandonedfactory_night/Hard/P014",
+    "amusement/amusement/Easy/P008",
+    "amusement/amusement/Hard/P007",
+    "carwelding/carwelding/Easy/P007",
+    "endofworld/endofworld/Easy/P009",
+    "gascola/gascola/Easy/P008",
+    "gascola/gascola/Hard/P009",
+    "hospital/hospital/Easy/P036",
+    "hospital/hospital/Hard/P049",
+    "japanesealley/japanesealley/Easy/P007",
+    "japanesealley/japanesealley/Hard/P005",
+    "neighborhood/neighborhood/Easy/P021",
+    "neighborhood/neighborhood/Hard/P017",
+    "ocean/ocean/Easy/P013",
+    "ocean/ocean/Hard/P009",
+    "office2/office2/Easy/P011",
+    "office2/office2/Hard/P010",
+    "office/office/Hard/P007",
+    "oldtown/oldtown/Easy/P007",
+    "oldtown/oldtown/Hard/P008",
+    "seasidetown/seasidetown/Easy/P009",
+    "seasonsforest/seasonsforest/Easy/P011",
+    "seasonsforest/seasonsforest/Hard/P006",
+    "seasonsforest_winter/seasonsforest_winter/Easy/P009",
+    "seasonsforest_winter/seasonsforest_winter/Hard/P018",
+    "soulcity/soulcity/Easy/P012",
+    "soulcity/soulcity/Hard/P009",
+    "westerndesert/westerndesert/Easy/P013",
+    "westerndesert/westerndesert/Hard/P007",
+]
+class TartanAir(RGBDDataset):
+    # scale depths to balance rot & trans
+    DEPTH_SCALE = 5.0
+    def __init__(self, mode='training', **kwargs):
+        self.mode = mode
+        self.n_frames = 2
+        super(TartanAir, self).__init__(name='TartanAir', **kwargs)
+    @staticmethod
+    def is_test_scene(scene):
+        # print(scene, any(x in scene for x in test_split))
+        return any(x in scene for x in test_split)
+    def _build_dataset(self):
+        from tqdm import tqdm
+        print("Building TartanAir dataset")
+        scene_info = {}
+        scenes = glob.glob(osp.join(self.root, '*/*/*/*'))
+        for scene in tqdm(sorted(scenes)):
+            images = sorted(glob.glob(osp.join(scene, 'image_left/*.png')))
+            depths = sorted(glob.glob(osp.join(scene, 'depth_left/*.npy')))
+            if len(images) != len(depths):
+                continue
+            poses = np.loadtxt(osp.join(scene, 'pose_left.txt'), delimiter=' ')
+            poses = poses[:, [1, 2, 0, 4, 5, 3, 6]]
+            poses[:,:3] /= TartanAir.DEPTH_SCALE
+            intrinsics = [TartanAir.calib_read()] * len(images)
+            # graph of co-visible frames based on flow
+            graph = self.build_frame_graph(poses, depths, intrinsics)
+            scene = '/'.join(scene.split('/'))
+            scene_info[scene] = {'images': images, 'depths': depths,
+                'poses': poses, 'intrinsics': intrinsics, 'graph': graph}
+        return scene_info
+    @staticmethod
+    def calib_read():
+        return np.array([320.0, 320.0, 320.0, 240.0])
+    @staticmethod
+    def image_read(image_file):
+        return cv2.imread(image_file)
+    @staticmethod
+    def depth_read(depth_file):
+        depth = np.load(depth_file) / TartanAir.DEPTH_SCALE
+        depth[depth==np.nan] = 1.0
+        depth[depth==np.inf] = 1.0
+        return depth

third-party/DPVO/dpvo/data_readers/tartan_test.txt ADDED Viewed

	@@ -0,0 +1,32 @@

+abandonedfactory/abandonedfactory/Easy/P011
+abandonedfactory/abandonedfactory/Hard/P011
+abandonedfactory_night/abandonedfactory_night/Easy/P013
+abandonedfactory_night/abandonedfactory_night/Hard/P014
+amusement/amusement/Easy/P008
+amusement/amusement/Hard/P007
+carwelding/carwelding/Easy/P007
+endofworld/endofworld/Easy/P009
+gascola/gascola/Easy/P008
+gascola/gascola/Hard/P009
+hospital/hospital/Easy/P036
+hospital/hospital/Hard/P049
+japanesealley/japanesealley/Easy/P007
+japanesealley/japanesealley/Hard/P005
+neighborhood/neighborhood/Easy/P021
+neighborhood/neighborhood/Hard/P017
+ocean/ocean/Easy/P013
+ocean/ocean/Hard/P009
+office2/office2/Easy/P011
+office2/office2/Hard/P010
+office/office/Hard/P007
+oldtown/oldtown/Easy/P007
+oldtown/oldtown/Hard/P008
+seasidetown/seasidetown/Easy/P009
+seasonsforest/seasonsforest/Easy/P011
+seasonsforest/seasonsforest/Hard/P006
+seasonsforest_winter/seasonsforest_winter/Easy/P009
+seasonsforest_winter/seasonsforest_winter/Hard/P018
+soulcity/soulcity/Easy/P012
+soulcity/soulcity/Hard/P009
+westerndesert/westerndesert/Easy/P013
+westerndesert/westerndesert/Hard/P007

third-party/DPVO/dpvo/dpvo.py ADDED Viewed

	@@ -0,0 +1,473 @@

+import numpy as np
+import torch
+import torch.multiprocessing as mp
+import torch.nn.functional as F
+from . import altcorr, fastba, lietorch
+from . import projective_ops as pops
+from .lietorch import SE3
+from .net import VONet
+from .patchgraph import PatchGraph
+from .utils import *
+mp.set_start_method('spawn', True)
+autocast = torch.cuda.amp.autocast
+Id = SE3.Identity(1, device="cuda")
+class DPVO:
+    def __init__(self, cfg, network, ht=480, wd=640, viz=False):
+        self.cfg = cfg
+        self.load_weights(network)
+        self.is_initialized = False
+        self.enable_timing = False
+        torch.set_num_threads(2)
+        self.M = self.cfg.PATCHES_PER_FRAME
+        self.N = self.cfg.BUFFER_SIZE
+        self.ht = ht    # image height
+        self.wd = wd    # image width
+        DIM = self.DIM
+        RES = self.RES
+        ### state attributes ###
+        self.tlist = []
+        self.counter = 0
+        # keep track of global-BA calls
+        self.ran_global_ba = np.zeros(100000, dtype=bool)
+        ht = ht // RES
+        wd = wd // RES
+        # dummy image for visualization
+        self.image_ = torch.zeros(self.ht, self.wd, 3, dtype=torch.uint8, device="cpu")
+        ### network attributes ###
+        if self.cfg.MIXED_PRECISION:
+            self.kwargs = kwargs = {"device": "cuda", "dtype": torch.half}
+        else:
+            self.kwargs = kwargs = {"device": "cuda", "dtype": torch.float}
+        ### frame memory size ###
+        self.pmem = self.mem = 36 # 32 was too small given default settings
+        if self.cfg.LOOP_CLOSURE:
+            self.last_global_ba = -1000 # keep track of time since last global opt
+            self.pmem = self.cfg.MAX_EDGE_AGE # patch memory
+        self.imap_ = torch.zeros(self.pmem, self.M, DIM, **kwargs)
+        self.gmap_ = torch.zeros(self.pmem, self.M, 128, self.P, self.P, **kwargs)
+        self.pg = PatchGraph(self.cfg, self.P, self.DIM, self.pmem, **kwargs)
+        # classic backend
+        if self.cfg.CLASSIC_LOOP_CLOSURE:
+            self.load_long_term_loop_closure()
+        self.fmap1_ = torch.zeros(1, self.mem, 128, ht // 1, wd // 1, **kwargs)
+        self.fmap2_ = torch.zeros(1, self.mem, 128, ht // 4, wd // 4, **kwargs)
+        # feature pyramid
+        self.pyramid = (self.fmap1_, self.fmap2_)
+        self.viewer = None
+        if viz:
+            self.start_viewer()
+    def load_long_term_loop_closure(self):
+        try:
+            from .loop_closure.long_term import LongTermLoopClosure
+            self.long_term_lc = LongTermLoopClosure(self.cfg, self.pg)
+        except ModuleNotFoundError as e:
+            self.cfg.CLASSIC_LOOP_CLOSURE = False
+            print(f"WARNING: {e}")
+    def load_weights(self, network):
+        # load network from checkpoint file
+        if isinstance(network, str):
+            from collections import OrderedDict
+            state_dict = torch.load(network)
+            new_state_dict = OrderedDict()
+            for k, v in state_dict.items():
+                if "update.lmbda" not in k:
+                    new_state_dict[k.replace('module.', '')] = v
+            self.network = VONet()
+            self.network.load_state_dict(new_state_dict)
+        else:
+            self.network = network
+        # steal network attributes
+        self.DIM = self.network.DIM
+        self.RES = self.network.RES
+        self.P = self.network.P
+        self.network.cuda()
+        self.network.eval()
+    def start_viewer(self):
+        from dpviewer import Viewer
+        intrinsics_ = torch.zeros(1, 4, dtype=torch.float32, device="cuda")
+        self.viewer = Viewer(
+            self.image_,
+            self.pg.poses_,
+            self.pg.points_,
+            self.pg.colors_,
+            intrinsics_)
+    @property
+    def poses(self):
+        return self.pg.poses_.view(1, self.N, 7)
+    @property
+    def patches(self):
+        return self.pg.patches_.view(1, self.N*self.M, 3, 3, 3)
+    @property
+    def intrinsics(self):
+        return self.pg.intrinsics_.view(1, self.N, 4)
+    @property
+    def ix(self):
+        return self.pg.index_.view(-1)
+    @property
+    def imap(self):
+        return self.imap_.view(1, self.pmem * self.M, self.DIM)
+    @property
+    def gmap(self):
+        return self.gmap_.view(1, self.pmem * self.M, 128, 3, 3)
+    @property
+    def n(self):
+        return self.pg.n
+    @n.setter
+    def n(self, val):
+        self.pg.n = val
+    @property
+    def m(self):
+        return self.pg.m
+    @m.setter
+    def m(self, val):
+        self.pg.m = val
+    def get_pose(self, t):
+        if t in self.traj:
+            return SE3(self.traj[t])
+        t0, dP = self.pg.delta[t]
+        return dP * self.get_pose(t0)
+    def terminate(self):
+        if self.cfg.CLASSIC_LOOP_CLOSURE:
+            self.long_term_lc.terminate(self.n)
+        if self.cfg.LOOP_CLOSURE:
+            self.append_factors(*self.pg.edges_loop())
+        for _ in range(12):
+            self.ran_global_ba[self.n] = False
+            self.update()
+        """ interpolate missing poses """
+        self.traj = {}
+        for i in range(self.n):
+            self.traj[self.pg.tstamps_[i]] = self.pg.poses_[i]
+        poses = [self.get_pose(t) for t in range(self.counter)]
+        poses = lietorch.stack(poses, dim=0)
+        poses = poses.inv().data.cpu().numpy()
+        tstamps = np.array(self.tlist, dtype=np.float64)
+        if self.viewer is not None:
+            self.viewer.join()
+        # Poses: x y z qx qy qz qw
+        return poses, tstamps
+    def corr(self, coords, indicies=None):
+        """ local correlation volume """
+        ii, jj = indicies if indicies is not None else (self.pg.kk, self.pg.jj)
+        ii1 = ii % (self.M * self.pmem)
+        jj1 = jj % (self.mem)
+        corr1 = altcorr.corr(self.gmap, self.pyramid[0], coords / 1, ii1, jj1, 3)
+        corr2 = altcorr.corr(self.gmap, self.pyramid[1], coords / 4, ii1, jj1, 3)
+        return torch.stack([corr1, corr2], -1).view(1, len(ii), -1)
+    def reproject(self, indicies=None):
+        """ reproject patch k from i -> j """
+        (ii, jj, kk) = indicies if indicies is not None else (self.pg.ii, self.pg.jj, self.pg.kk)
+        coords = pops.transform(SE3(self.poses), self.patches, self.intrinsics, ii, jj, kk)
+        return coords.permute(0, 1, 4, 2, 3).contiguous()
+    def append_factors(self, ii, jj):
+        self.pg.jj = torch.cat([self.pg.jj, jj])
+        self.pg.kk = torch.cat([self.pg.kk, ii])
+        self.pg.ii = torch.cat([self.pg.ii, self.ix[ii]])
+        net = torch.zeros(1, len(ii), self.DIM, **self.kwargs)
+        self.pg.net = torch.cat([self.pg.net, net], dim=1)
+    def remove_factors(self, m, store: bool):
+        assert self.pg.ii.numel() == self.pg.weight.shape[1]
+        if store:
+            self.pg.ii_inac = torch.cat((self.pg.ii_inac, self.pg.ii[m]))
+            self.pg.jj_inac = torch.cat((self.pg.jj_inac, self.pg.jj[m]))
+            self.pg.kk_inac = torch.cat((self.pg.kk_inac, self.pg.kk[m]))
+            self.pg.weight_inac = torch.cat((self.pg.weight_inac, self.pg.weight[:,m]), dim=1)
+            self.pg.target_inac = torch.cat((self.pg.target_inac, self.pg.target[:,m]), dim=1)
+        self.pg.weight = self.pg.weight[:,~m]
+        self.pg.target = self.pg.target[:,~m]
+        self.pg.ii = self.pg.ii[~m]
+        self.pg.jj = self.pg.jj[~m]
+        self.pg.kk = self.pg.kk[~m]
+        self.pg.net = self.pg.net[:,~m]
+        assert self.pg.ii.numel() == self.pg.weight.shape[1]
+    def motion_probe(self):
+        """ kinda hacky way to ensure enough motion for initialization """
+        kk = torch.arange(self.m-self.M, self.m, device="cuda")
+        jj = self.n * torch.ones_like(kk)
+        ii = self.ix[kk]
+        net = torch.zeros(1, len(ii), self.DIM, **self.kwargs)
+        coords = self.reproject(indicies=(ii, jj, kk))
+        with autocast(enabled=self.cfg.MIXED_PRECISION):
+            corr = self.corr(coords, indicies=(kk, jj))
+            ctx = self.imap[:,kk % (self.M * self.pmem)]
+            net, (delta, weight, _) = \
+                self.network.update(net, ctx, corr, None, ii, jj, kk)
+        return torch.quantile(delta.norm(dim=-1).float(), 0.5)
+    def motionmag(self, i, j):
+        k = (self.pg.ii == i) & (self.pg.jj == j)
+        ii = self.pg.ii[k]
+        jj = self.pg.jj[k]
+        kk = self.pg.kk[k]
+        flow, _ = pops.flow_mag(SE3(self.poses), self.patches, self.intrinsics, ii, jj, kk, beta=0.5)
+        return flow.mean().item()
+    def keyframe(self):
+        i = self.n - self.cfg.KEYFRAME_INDEX - 1
+        j = self.n - self.cfg.KEYFRAME_INDEX + 1
+        m = self.motionmag(i, j) + self.motionmag(j, i)
+        if m / 2 < self.cfg.KEYFRAME_THRESH:
+            k = self.n - self.cfg.KEYFRAME_INDEX
+            t0 = self.pg.tstamps_[k-1]
+            t1 = self.pg.tstamps_[k]
+            dP = SE3(self.pg.poses_[k]) * SE3(self.pg.poses_[k-1]).inv()
+            self.pg.delta[t1] = (t0, dP)
+            to_remove = (self.pg.ii == k) | (self.pg.jj == k)
+            self.remove_factors(to_remove, store=False)
+            self.pg.kk[self.pg.ii > k] -= self.M
+            self.pg.ii[self.pg.ii > k] -= 1
+            self.pg.jj[self.pg.jj > k] -= 1
+            for i in range(k, self.n-1):
+                self.pg.tstamps_[i] = self.pg.tstamps_[i+1]
+                self.pg.colors_[i] = self.pg.colors_[i+1]
+                self.pg.poses_[i] = self.pg.poses_[i+1]
+                self.pg.patches_[i] = self.pg.patches_[i+1]
+                self.pg.intrinsics_[i] = self.pg.intrinsics_[i+1]
+                self.imap_[i % self.pmem] = self.imap_[(i+1) % self.pmem]
+                self.gmap_[i % self.pmem] = self.gmap_[(i+1) % self.pmem]
+                self.fmap1_[0,i%self.mem] = self.fmap1_[0,(i+1)%self.mem]
+                self.fmap2_[0,i%self.mem] = self.fmap2_[0,(i+1)%self.mem]
+            self.n -= 1
+            self.m-= self.M
+            if self.cfg.CLASSIC_LOOP_CLOSURE:
+                self.long_term_lc.keyframe(k)
+        to_remove = self.ix[self.pg.kk] < self.n - self.cfg.REMOVAL_WINDOW # Remove edges falling outside the optimization window
+        if self.cfg.LOOP_CLOSURE:
+            # ...unless they are being used for loop closure
+            lc_edges = ((self.pg.jj - self.pg.ii) > 30) & (self.pg.jj > (self.n - self.cfg.OPTIMIZATION_WINDOW))
+            to_remove = to_remove & ~lc_edges
+        self.remove_factors(to_remove, store=True)
+    def __run_global_BA(self):
+        """ Global bundle adjustment
+         Includes both active and inactive edges """
+        full_target = torch.cat((self.pg.target_inac, self.pg.target), dim=1)
+        full_weight = torch.cat((self.pg.weight_inac, self.pg.weight), dim=1)
+        full_ii = torch.cat((self.pg.ii_inac, self.pg.ii))
+        full_jj = torch.cat((self.pg.jj_inac, self.pg.jj))
+        full_kk = torch.cat((self.pg.kk_inac, self.pg.kk))
+        self.pg.normalize()
+        lmbda = torch.as_tensor([1e-4], device="cuda")
+        t0 = self.pg.ii.min().item()
+        fastba.BA(self.poses, self.patches, self.intrinsics,
+            full_target, full_weight, lmbda, full_ii, full_jj, full_kk, t0, self.n, M=self.M, iterations=2, eff_impl=True)
+        self.ran_global_ba[self.n] = True
+    def update(self):
+        with Timer("other", enabled=self.enable_timing):
+            coords = self.reproject()
+            with autocast(enabled=True):
+                corr = self.corr(coords)
+                ctx = self.imap[:, self.pg.kk % (self.M * self.pmem)]
+                self.pg.net, (delta, weight, _) = \
+                    self.network.update(self.pg.net, ctx, corr, None, self.pg.ii, self.pg.jj, self.pg.kk)
+            lmbda = torch.as_tensor([1e-4], device="cuda")
+            weight = weight.float()
+            target = coords[...,self.P//2,self.P//2] + delta.float()
+        self.pg.target = target
+        self.pg.weight = weight
+        with Timer("BA", enabled=self.enable_timing):
+            try:
+                # run global bundle adjustment if there exist long-range edges
+                if (self.pg.ii < self.n - self.cfg.REMOVAL_WINDOW - 1).any() and not self.ran_global_ba[self.n]:
+                    self.__run_global_BA()
+                else:
+                    t0 = self.n - self.cfg.OPTIMIZATION_WINDOW if self.is_initialized else 1
+                    t0 = max(t0, 1)
+                    fastba.BA(self.poses, self.patches, self.intrinsics,
+                        target, weight, lmbda, self.pg.ii, self.pg.jj, self.pg.kk, t0, self.n, M=self.M, iterations=2, eff_impl=False)
+            except:
+                print("Warning BA failed...")
+            points = pops.point_cloud(SE3(self.poses), self.patches[:, :self.m], self.intrinsics, self.ix[:self.m])
+            points = (points[...,1,1,:3] / points[...,1,1,3:]).reshape(-1, 3)
+            self.pg.points_[:len(points)] = points[:]
+    def __edges_forw(self):
+        r=self.cfg.PATCH_LIFETIME
+        t0 = self.M * max((self.n - r), 0)
+        t1 = self.M * max((self.n - 1), 0)
+        return flatmeshgrid(
+            torch.arange(t0, t1, device="cuda"),
+            torch.arange(self.n-1, self.n, device="cuda"), indexing='ij')
+    def __edges_back(self):
+        r=self.cfg.PATCH_LIFETIME
+        t0 = self.M * max((self.n - 1), 0)
+        t1 = self.M * max((self.n - 0), 0)
+        return flatmeshgrid(torch.arange(t0, t1, device="cuda"),
+            torch.arange(max(self.n-r, 0), self.n, device="cuda"), indexing='ij')
+    def __call__(self, tstamp, image, intrinsics):
+        """ track new frame """
+        if self.cfg.CLASSIC_LOOP_CLOSURE:
+            self.long_term_lc(image, self.n)
+        if (self.n+1) >= self.N:
+            raise Exception(f'The buffer size is too small. You can increase it using "--opts BUFFER_SIZE={self.N*2}"')
+        if self.viewer is not None:
+            self.viewer.update_image(image.contiguous())
+        image = 2 * (image[None,None] / 255.0) - 0.5
+        with autocast(enabled=self.cfg.MIXED_PRECISION):
+            fmap, gmap, imap, patches, _, clr = \
+                self.network.patchify(image,
+                    patches_per_image=self.cfg.PATCHES_PER_FRAME,
+                    centroid_sel_strat=self.cfg.CENTROID_SEL_STRAT,
+                    return_color=True)
+        ### update state attributes ###
+        self.tlist.append(tstamp)
+        self.pg.tstamps_[self.n] = self.counter
+        self.pg.intrinsics_[self.n] = intrinsics / self.RES
+        # color info for visualization
+        clr = (clr[0,:,[2,1,0]] + 0.5) * (255.0 / 2)
+        self.pg.colors_[self.n] = clr.to(torch.uint8)
+        self.pg.index_[self.n + 1] = self.n + 1
+        self.pg.index_map_[self.n + 1] = self.m + self.M
+        if self.n > 1:
+            if self.cfg.MOTION_MODEL == 'DAMPED_LINEAR':
+                P1 = SE3(self.pg.poses_[self.n-1])
+                P2 = SE3(self.pg.poses_[self.n-2])
+                # To deal with varying camera hz
+                *_, a,b,c = [1]*3 + self.tlist
+                fac = (c-b) / (b-a)
+                xi = self.cfg.MOTION_DAMPING * fac * (P1 * P2.inv()).log()
+                tvec_qvec = (SE3.exp(xi) * P1).data
+                self.pg.poses_[self.n] = tvec_qvec
+            else:
+                tvec_qvec = self.poses[self.n-1]
+                self.pg.poses_[self.n] = tvec_qvec
+        # TODO better depth initialization
+        patches[:,:,2] = torch.rand_like(patches[:,:,2,0,0,None,None])
+        if self.is_initialized:
+            s = torch.median(self.pg.patches_[self.n-3:self.n,:,2])
+            patches[:,:,2] = s
+        self.pg.patches_[self.n] = patches
+        ### update network attributes ###
+        self.imap_[self.n % self.pmem] = imap.squeeze()
+        self.gmap_[self.n % self.pmem] = gmap.squeeze()
+        self.fmap1_[:, self.n % self.mem] = F.avg_pool2d(fmap[0], 1, 1)
+        self.fmap2_[:, self.n % self.mem] = F.avg_pool2d(fmap[0], 4, 4)
+        self.counter += 1
+        if self.n > 0 and not self.is_initialized:
+            if self.motion_probe() < 2.0:
+                self.pg.delta[self.counter - 1] = (self.counter - 2, Id[0])
+                return
+        self.n += 1
+        self.m += self.M
+        if self.cfg.LOOP_CLOSURE:
+            if self.n - self.last_global_ba >= self.cfg.GLOBAL_OPT_FREQ:
+                """ Add loop closure factors """
+                lii, ljj = self.pg.edges_loop()
+                if lii.numel() > 0:
+                    self.last_global_ba = self.n
+                    self.append_factors(lii, ljj)
+        # Add forward and backward factors
+        self.append_factors(*self.__edges_forw())
+        self.append_factors(*self.__edges_back())
+        if self.n == 8 and not self.is_initialized:
+            self.is_initialized = True
+            for itr in range(12):
+                self.update()
+        elif self.is_initialized:
+            self.update()
+            self.keyframe()
+        if self.cfg.CLASSIC_LOOP_CLOSURE:
+            self.long_term_lc.attempt_loop_closure(self.n)
+            self.long_term_lc.lc_callback()

third-party/DPVO/dpvo/extractor.py ADDED Viewed

	@@ -0,0 +1,264 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ResidualBlock(nn.Module):
+    def __init__(self, in_planes, planes, norm_fn='group', stride=1):
+        super(ResidualBlock, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, stride=stride)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
+        self.relu = nn.ReLU(inplace=True)
+        num_groups = planes // 8
+        if norm_fn == 'group':
+            self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+            self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+            if not stride == 1:
+                self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+        elif norm_fn == 'batch':
+            self.norm1 = nn.BatchNorm2d(planes)
+            self.norm2 = nn.BatchNorm2d(planes)
+            if not stride == 1:
+                self.norm3 = nn.BatchNorm2d(planes)
+        elif norm_fn == 'instance':
+            self.norm1 = nn.InstanceNorm2d(planes)
+            self.norm2 = nn.InstanceNorm2d(planes)
+            if not stride == 1:
+                self.norm3 = nn.InstanceNorm2d(planes)
+        elif norm_fn == 'none':
+            self.norm1 = nn.Sequential()
+            self.norm2 = nn.Sequential()
+            if not stride == 1:
+                self.norm3 = nn.Sequential()
+        if stride == 1:
+            self.downsample = None
+        else:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm3)
+    def forward(self, x):
+        y = x
+        y = self.relu(self.norm1(self.conv1(y)))
+        y = self.relu(self.norm2(self.conv2(y)))
+        if self.downsample is not None:
+            x = self.downsample(x)
+        return self.relu(x+y)
+class BottleneckBlock(nn.Module):
+    def __init__(self, in_planes, planes, norm_fn='group', stride=1):
+        super(BottleneckBlock, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes//4, kernel_size=1, padding=0)
+        self.conv2 = nn.Conv2d(planes//4, planes//4, kernel_size=3, padding=1, stride=stride)
+        self.conv3 = nn.Conv2d(planes//4, planes, kernel_size=1, padding=0)
+        self.relu = nn.ReLU(inplace=True)
+        num_groups = planes // 8
+        if norm_fn == 'group':
+            self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4)
+            self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4)
+            self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+            if not stride == 1:
+                self.norm4 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+        elif norm_fn == 'batch':
+            self.norm1 = nn.BatchNorm2d(planes//4)
+            self.norm2 = nn.BatchNorm2d(planes//4)
+            self.norm3 = nn.BatchNorm2d(planes)
+            if not stride == 1:
+                self.norm4 = nn.BatchNorm2d(planes)
+        elif norm_fn == 'instance':
+            self.norm1 = nn.InstanceNorm2d(planes//4)
+            self.norm2 = nn.InstanceNorm2d(planes//4)
+            self.norm3 = nn.InstanceNorm2d(planes)
+            if not stride == 1:
+                self.norm4 = nn.InstanceNorm2d(planes)
+        elif norm_fn == 'none':
+            self.norm1 = nn.Sequential()
+            self.norm2 = nn.Sequential()
+            self.norm3 = nn.Sequential()
+            if not stride == 1:
+                self.norm4 = nn.Sequential()
+        if stride == 1:
+            self.downsample = None
+        else:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm4)
+    def forward(self, x):
+        y = x
+        y = self.relu(self.norm1(self.conv1(y)))
+        y = self.relu(self.norm2(self.conv2(y)))
+        y = self.relu(self.norm3(self.conv3(y)))
+        if self.downsample is not None:
+            x = self.downsample(x)
+        return self.relu(x+y)
+DIM=32
+class BasicEncoder(nn.Module):
+    def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0, multidim=False):
+        super(BasicEncoder, self).__init__()
+        self.norm_fn = norm_fn
+        self.multidim = multidim
+        if self.norm_fn == 'group':
+            self.norm1 = nn.GroupNorm(num_groups=8, num_channels=DIM)
+        elif self.norm_fn == 'batch':
+            self.norm1 = nn.BatchNorm2d(DIM)
+        elif self.norm_fn == 'instance':
+            self.norm1 = nn.InstanceNorm2d(DIM)
+        elif self.norm_fn == 'none':
+            self.norm1 = nn.Sequential()
+        self.conv1 = nn.Conv2d(3, DIM, kernel_size=7, stride=2, padding=3)
+        self.relu1 = nn.ReLU(inplace=True)
+        self.in_planes = DIM
+        self.layer1 = self._make_layer(DIM,  stride=1)
+        self.layer2 = self._make_layer(2*DIM, stride=2)
+        self.layer3 = self._make_layer(4*DIM, stride=2)
+        # output convolution
+        self.conv2 = nn.Conv2d(4*DIM, output_dim, kernel_size=1)
+        if self.multidim:
+            self.layer4 = self._make_layer(256, stride=2)
+            self.layer5 = self._make_layer(512, stride=2)
+            self.in_planes = 256
+            self.layer6 = self._make_layer(256, stride=1)
+            self.in_planes = 128
+            self.layer7 = self._make_layer(128, stride=1)
+            self.up1 = nn.Conv2d(512, 256, 1)
+            self.up2 = nn.Conv2d(256, 128, 1)
+            self.conv3 = nn.Conv2d(128, output_dim, kernel_size=1)
+        if dropout > 0:
+            self.dropout = nn.Dropout2d(p=dropout)
+        else:
+            self.dropout = None
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)):
+                if m.weight is not None:
+                    nn.init.constant_(m.weight, 1)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+    def _make_layer(self, dim, stride=1):
+        layer1 = ResidualBlock(self.in_planes, dim, self.norm_fn, stride=stride)
+        layer2 = ResidualBlock(dim, dim, self.norm_fn, stride=1)
+        layers = (layer1, layer2)
+        self.in_planes = dim
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        b, n, c1, h1, w1 = x.shape
+        x = x.view(b*n, c1, h1, w1)
+        x = self.conv1(x)
+        x = self.norm1(x)
+        x = self.relu1(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.conv2(x)
+        _, c2, h2, w2 = x.shape
+        return x.view(b, n, c2, h2, w2)
+class BasicEncoder4(nn.Module):
+    def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0, multidim=False):
+        super(BasicEncoder4, self).__init__()
+        self.norm_fn = norm_fn
+        self.multidim = multidim
+        if self.norm_fn == 'group':
+            self.norm1 = nn.GroupNorm(num_groups=8, num_channels=DIM)
+        elif self.norm_fn == 'batch':
+            self.norm1 = nn.BatchNorm2d(DIM)
+        elif self.norm_fn == 'instance':
+            self.norm1 = nn.InstanceNorm2d(DIM)
+        elif self.norm_fn == 'none':
+            self.norm1 = nn.Sequential()
+        self.conv1 = nn.Conv2d(3, DIM, kernel_size=7, stride=2, padding=3)
+        self.relu1 = nn.ReLU(inplace=True)
+        self.in_planes = DIM
+        self.layer1 = self._make_layer(DIM,  stride=1)
+        self.layer2 = self._make_layer(2*DIM, stride=2)
+        # output convolution
+        self.conv2 = nn.Conv2d(2*DIM, output_dim, kernel_size=1)
+        if dropout > 0:
+            self.dropout = nn.Dropout2d(p=dropout)
+        else:
+            self.dropout = None
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)):
+                if m.weight is not None:
+                    nn.init.constant_(m.weight, 1)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+    def _make_layer(self, dim, stride=1):
+        layer1 = ResidualBlock(self.in_planes, dim, self.norm_fn, stride=stride)
+        layer2 = ResidualBlock(dim, dim, self.norm_fn, stride=1)
+        layers = (layer1, layer2)
+        self.in_planes = dim
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        b, n, c1, h1, w1 = x.shape
+        x = x.view(b*n, c1, h1, w1)
+        x = self.conv1(x)
+        x = self.norm1(x)
+        x = self.relu1(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.conv2(x)
+        _, c2, h2, w2 = x.shape
+        return x.view(b, n, c2, h2, w2)

third-party/DPVO/dpvo/fastba/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .ba import BA, neighbors, reproject

third-party/DPVO/dpvo/fastba/ba.cpp ADDED Viewed

	@@ -0,0 +1,189 @@

+#include <torch/extension.h>
+#include <vector>
+#include <unordered_map>
+#include <algorithm>
+#include <iostream>
+#include <Eigen/Core>
+#include <Eigen/Sparse>
+std::vector<torch::Tensor> cuda_ba(
+    torch::Tensor poses,
+    torch::Tensor patches,
+    torch::Tensor intrinsics,
+    torch::Tensor target,
+    torch::Tensor weight,
+    torch::Tensor lmbda,
+    torch::Tensor ii,
+    torch::Tensor jj,
+    torch::Tensor kk,
+    const int PPF,
+    int t0, int t1, int iterations, bool eff_impl);
+torch::Tensor cuda_reproject(
+    torch::Tensor poses,
+    torch::Tensor patches,
+    torch::Tensor intrinsics,
+    torch::Tensor ii,
+    torch::Tensor jj,
+    torch::Tensor kk);
+std::vector<torch::Tensor> ba(
+    torch::Tensor poses,
+    torch::Tensor patches,
+    torch::Tensor intrinsics,
+    torch::Tensor target,
+    torch::Tensor weight,
+    torch::Tensor lmbda,
+    torch::Tensor ii,
+    torch::Tensor jj,
+    torch::Tensor kk,
+    int PPF,
+    int t0, int t1, int iterations, bool eff_impl) {
+  return cuda_ba(poses, patches, intrinsics, target, weight, lmbda, ii, jj, kk, PPF, t0, t1, iterations, eff_impl);
+}
+torch::Tensor reproject(
+    torch::Tensor poses,
+    torch::Tensor patches,
+    torch::Tensor intrinsics,
+    torch::Tensor ii,
+    torch::Tensor jj,
+    torch::Tensor kk) {
+  return cuda_reproject(poses, patches, intrinsics, ii, jj, kk);
+}
+std::vector<torch::Tensor> neighbors(torch::Tensor ii, torch::Tensor jj)
+{
+  auto tup = torch::_unique(ii, true, true);
+  torch::Tensor uniq = std::get<0>(tup).to(torch::kCPU);
+  torch::Tensor perm = std::get<1>(tup).to(torch::kCPU);
+  jj = jj.to(torch::kCPU);
+  auto jj_accessor = jj.accessor<long,1>();
+  auto perm_accessor = perm.accessor<long,1>();
+  std::vector<std::vector<long>> index(uniq.size(0));
+  for (int i=0; i < ii.size(0); i++) {
+    index[perm_accessor[i]].push_back(i);
+  }
+  auto opts = torch::TensorOptions().dtype(torch::kInt64);
+  torch::Tensor ix = torch::empty({ii.size(0)}, opts);
+  torch::Tensor jx = torch::empty({ii.size(0)}, opts);
+  auto ix_accessor = ix.accessor<long,1>();
+  auto jx_accessor = jx.accessor<long,1>();
+  for (int i=0; i<uniq.size(0); i++) {
+    std::vector<long>& idx = index[i];
+    std::stable_sort(idx.begin(), idx.end(),
+       [&jj_accessor](size_t i, size_t j) {return jj_accessor[i] < jj_accessor[j];});
+    for (int i=0; i < idx.size(); i++) {
+      ix_accessor[idx[i]] = (i > 0) ? idx[i-1] : -1;
+      jx_accessor[idx[i]] = (i < idx.size() - 1) ? idx[i+1] : -1;
+    }
+  }
+  ix = ix.to(torch::kCUDA);
+  jx = jx.to(torch::kCUDA);
+  return {ix, jx};
+}
+typedef Eigen::SparseMatrix<double> SpMat;
+typedef Eigen::Triplet<double> T;
+Eigen::VectorXd solve(const SpMat &A, const Eigen::VectorXd &b, int freen){
+  if (freen < 0){
+    const Eigen::SimplicialCholesky<SpMat> chol(A);
+    return chol.solve(b); // n x 1
+  }
+  const SpMat A_sub = A.topLeftCorner(freen, freen);
+  const Eigen::VectorXd b_sub = b.topRows(freen);
+  const Eigen::VectorXd delta = solve(A_sub, b_sub, -7);
+  Eigen::VectorXd delta2(b.rows());
+  delta2.setZero();
+  delta2.topRows(freen) = delta;
+  return delta2;
+}
+std::vector<torch::Tensor> solve_system(torch::Tensor J_Ginv_i, torch::Tensor J_Ginv_j, torch::Tensor ii, torch::Tensor jj, torch::Tensor res, float ep, float lm, int freen)
+{
+  const torch::Device device = res.device();
+  J_Ginv_i = J_Ginv_i.to(torch::kCPU);
+  J_Ginv_j = J_Ginv_j.to(torch::kCPU);
+  ii = ii.to(torch::kCPU);
+  jj = jj.to(torch::kCPU);
+  res = res.clone().to(torch::kCPU);
+  const int r = res.size(0);
+  const int n = std::max(ii.max().item<long>(), jj.max().item<long>()) + 1;
+  res.resize_({r*7});
+  float *res_ptr = res.data_ptr<float>();
+  Eigen::Map<Eigen::VectorXf> v(res_ptr, r*7);
+  SpMat J(r*7, n*7);
+  std::vector<T> tripletList;
+  tripletList.reserve(r*7*7*2);
+  auto ii_acc = ii.accessor<long,1>();
+  auto jj_acc = jj.accessor<long,1>();
+  auto J_Ginv_i_acc = J_Ginv_i.accessor<float,3>();
+  auto J_Ginv_j_acc = J_Ginv_j.accessor<float,3>();
+  for (int x=0; x<r; x++){
+    const int i = ii_acc[x];
+    const int j = jj_acc[x];
+    for (int k=0; k<7; k++){
+      for (int l=0; l<7; l++){
+        if (i == j)
+          exit(1);
+        const float val_i = J_Ginv_i_acc[x][k][l];
+        tripletList.emplace_back(x*7 + k, i*7 + l, val_i);
+        const float val_j = J_Ginv_j_acc[x][k][l];
+        tripletList.emplace_back(x*7 + k, j*7 + l, val_j);
+      }
+    }
+  }
+  J.setFromTriplets(tripletList.begin(), tripletList.end());
+  const SpMat Jt = J.transpose();
+  Eigen::VectorXd b = -(Jt * v.cast<double>());
+  SpMat A = Jt * J;
+  A.diagonal() += (A.diagonal() * lm);
+  A.diagonal().array() += ep;
+  Eigen::VectorXf delta = solve(A, b, freen*7).cast<float>();
+  torch::Tensor delta_tensor = torch::from_blob(delta.data(), {n*7}).clone().to(device);
+  delta_tensor.resize_({n, 7});
+  return {delta_tensor};
+  Eigen::Matrix<float, -1, -1, Eigen::RowMajor> dense_J(J.cast<float>());
+  torch::Tensor dense_J_tensor = torch::from_blob(dense_J.data(), {r*7, n*7}).clone().to(device);
+  dense_J_tensor.resize_({r, 7, n, 7});
+  return {delta_tensor, dense_J_tensor};
+}
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("forward", &ba, "BA forward operator");
+  m.def("neighbors", &neighbors, "temporal neighboor indicies");
+  m.def("reproject", &reproject, "temporal neighboor indicies");
+  m.def("solve_system", &solve_system, "temporal neighboor indicies");
+}

third-party/DPVO/dpvo/fastba/ba.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import torch
+import cuda_ba
+neighbors = cuda_ba.neighbors
+reproject = cuda_ba.reproject
+def BA(poses, patches, intrinsics, target, weight, lmbda, ii, jj, kk, t0, t1, M, iterations, eff_impl=False):
+    return cuda_ba.forward(poses.data, patches, intrinsics, target, weight, lmbda, ii, jj, kk, M, t0, t1, iterations, eff_impl)

third-party/DPVO/dpvo/fastba/ba_cuda.cu ADDED Viewed

	@@ -0,0 +1,617 @@

+#include <torch/extension.h>
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <memory>
+#include <ATen/ATen.h>
+#include <ATen/NativeFunctions.h>
+#include <ATen/Parallel.h>
+#include "block_e.cuh"
+#define GPU_1D_KERNEL_LOOP(i, n) \
+  for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i<n; i += blockDim.x * gridDim.x)
+#define NUM_THREADS 256
+#define NUM_BLOCKS(batch_size) ((batch_size + NUM_THREADS - 1) / NUM_THREADS)
+inline void release_assert(const char *file, int line, bool condition, const std::string &msg){
+    if (!condition)
+        throw std::runtime_error(std::string("Assertion failed: ") + file + " (" + std::to_string(line) + ")\n" + msg + "\n");
+}
+#define RASSERT(c) release_assert(__FILE__, __LINE__, c, "")
+#define MRASSERT(c, m) release_assert(__FILE__, __LINE__, c, m)
+void save(const char *filename, const torch::Tensor &data){
+  const auto pickled = torch::pickle_save(data);
+  std::ofstream fout(filename, std::ios::out | std::ios::binary);
+  fout.write(pickled.data(), pickled.size());
+  fout.close();
+}
+__device__ void
+actSO3(const float *q, const float *X, float *Y) {
+  float uv[3];
+  uv[0] = 2.0 * (q[1]*X[2] - q[2]*X[1]);
+  uv[1] = 2.0 * (q[2]*X[0] - q[0]*X[2]);
+  uv[2] = 2.0 * (q[0]*X[1] - q[1]*X[0]);
+  Y[0] = X[0] + q[3]*uv[0] + (q[1]*uv[2] - q[2]*uv[1]);
+  Y[1] = X[1] + q[3]*uv[1] + (q[2]*uv[0] - q[0]*uv[2]);
+  Y[2] = X[2] + q[3]*uv[2] + (q[0]*uv[1] - q[1]*uv[0]);
+}
+__device__  void
+actSE3(const float *t, const float *q, const float *X, float *Y) {
+  actSO3(q, X, Y);
+  Y[3] = X[3];
+  Y[0] += X[3] * t[0];
+  Y[1] += X[3] * t[1];
+  Y[2] += X[3] * t[2];
+}
+__device__ void
+adjSE3(const float *t, const float *q, const float *X, float *Y) {
+  float qinv[4] = {-q[0], -q[1], -q[2], q[3]};
+  actSO3(qinv, &X[0], &Y[0]);
+  actSO3(qinv, &X[3], &Y[3]);
+  float u[3], v[3];
+  u[0] = t[2]*X[1] - t[1]*X[2];
+  u[1] = t[0]*X[2] - t[2]*X[0];
+  u[2] = t[1]*X[0] - t[0]*X[1];
+  actSO3(qinv, u, v);
+  Y[3] += v[0];
+  Y[4] += v[1];
+  Y[5] += v[2];
+}
+__device__ void
+relSE3(const float *ti, const float *qi, const float *tj, const float *qj, float *tij, float *qij) {
+  qij[0] = -qj[3] * qi[0] + qj[0] * qi[3] - qj[1] * qi[2] + qj[2] * qi[1],
+  qij[1] = -qj[3] * qi[1] + qj[1] * qi[3] - qj[2] * qi[0] + qj[0] * qi[2],
+  qij[2] = -qj[3] * qi[2] + qj[2] * qi[3] - qj[0] * qi[1] + qj[1] * qi[0],
+  qij[3] =  qj[3] * qi[3] + qj[0] * qi[0] + qj[1] * qi[1] + qj[2] * qi[2],
+  actSO3(qij, ti, tij);
+  tij[0] = tj[0] - tij[0];
+  tij[1] = tj[1] - tij[1];
+  tij[2] = tj[2] - tij[2];
+}
+__device__ void
+expSO3(const float *phi, float* q) {
+  // SO3 exponential map
+  float theta_sq = phi[0]*phi[0] + phi[1]*phi[1] + phi[2]*phi[2];
+  float theta_p4 = theta_sq * theta_sq;
+  float theta = sqrtf(theta_sq);
+  float imag, real;
+  if (theta_sq < 1e-8) {
+    imag = 0.5 - (1.0/48.0)*theta_sq + (1.0/3840.0)*theta_p4;
+    real = 1.0 - (1.0/ 8.0)*theta_sq + (1.0/ 384.0)*theta_p4;
+  } else {
+    imag = sinf(0.5 * theta) / theta;
+    real = cosf(0.5 * theta);
+  }
+  q[0] = imag * phi[0];
+  q[1] = imag * phi[1];
+  q[2] = imag * phi[2];
+  q[3] = real;
+}
+__device__ void
+crossInplace(const float* a, float *b) {
+  float x[3] = {
+    a[1]*b[2] - a[2]*b[1],
+    a[2]*b[0] - a[0]*b[2],
+    a[0]*b[1] - a[1]*b[0],
+  };
+  b[0] = x[0];
+  b[1] = x[1];
+  b[2] = x[2];
+}
+__device__ void
+expSE3(const float *xi, float* t, float* q) {
+  // SE3 exponential map
+  expSO3(xi + 3, q);
+  float tau[3] = {xi[0], xi[1], xi[2]};
+  float phi[3] = {xi[3], xi[4], xi[5]};
+  float theta_sq = phi[0]*phi[0] + phi[1]*phi[1] + phi[2]*phi[2];
+  float theta = sqrtf(theta_sq);
+  t[0] = tau[0];
+  t[1] = tau[1];
+  t[2] = tau[2];
+  if (theta > 1e-4) {
+    float a = (1 - cosf(theta)) / theta_sq;
+    crossInplace(phi, tau);
+    t[0] += a * tau[0];
+    t[1] += a * tau[1];
+    t[2] += a * tau[2];
+    float b = (theta - sinf(theta)) / (theta * theta_sq);
+    crossInplace(phi, tau);
+    t[0] += b * tau[0];
+    t[1] += b * tau[1];
+    t[2] += b * tau[2];
+  }
+}
+__device__ void
+retrSE3(const float *xi, const float* t, const float* q, float* t1, float* q1) {
+  // retraction on SE3 manifold
+  float dt[3] = {0, 0, 0};
+  float dq[4] = {0, 0, 0, 1};
+  expSE3(xi, dt, dq);
+  q1[0] = dq[3] * q[0] + dq[0] * q[3] + dq[1] * q[2] - dq[2] * q[1];
+  q1[1] = dq[3] * q[1] + dq[1] * q[3] + dq[2] * q[0] - dq[0] * q[2];
+  q1[2] = dq[3] * q[2] + dq[2] * q[3] + dq[0] * q[1] - dq[1] * q[0];
+  q1[3] = dq[3] * q[3] - dq[0] * q[0] - dq[1] * q[1] - dq[2] * q[2];
+  actSO3(dq, t, t1);
+  t1[0] += dt[0];
+  t1[1] += dt[1];
+  t1[2] += dt[2];
+}
+__global__ void pose_retr_kernel(const int t0, const int t1,
+    torch::PackedTensorAccessor32<float,2,torch::RestrictPtrTraits> poses,
+    torch::PackedTensorAccessor32<mtype,2,torch::RestrictPtrTraits> update)
+{
+  GPU_1D_KERNEL_LOOP(i, t1 - t0) {
+    const float t = t0 + i;
+    float t1[3], t0[3] = { poses[t][0], poses[t][1], poses[t][2] };
+    float q1[4], q0[4] = { poses[t][3], poses[t][4], poses[t][5], poses[t][6] };
+    float xi[6] = {
+      update[i][0],
+      update[i][1],
+      update[i][2],
+      update[i][3],
+      update[i][4],
+      update[i][5],
+    };
+    retrSE3(xi, t0, q0, t1, q1);
+    poses[t][0] = t1[0];
+    poses[t][1] = t1[1];
+    poses[t][2] = t1[2];
+    poses[t][3] = q1[0];
+    poses[t][4] = q1[1];
+    poses[t][5] = q1[2];
+    poses[t][6] = q1[3];
+  }
+}
+__global__ void patch_retr_kernel(
+    torch::PackedTensorAccessor32<long,1,torch::RestrictPtrTraits> index,
+    torch::PackedTensorAccessor32<float,4,torch::RestrictPtrTraits> patches,
+    torch::PackedTensorAccessor32<mtype,1,torch::RestrictPtrTraits> update)
+{
+  GPU_1D_KERNEL_LOOP(n, index.size(0)) {
+    const int p = patches.size(2);
+    const int ix = index[n];
+    float d = patches[ix][2][0][0];
+    d = d + update[n];
+    d = (d > 20) ? 1.0 : d;
+    d = max(d, 1e-4);
+    for (int i=0; i<p; i++) {
+      for (int j=0; j<p; j++) {
+        patches[ix][2][i][j] = d;
+      }
+    }
+  }
+}
+__global__ void reprojection_residuals_and_hessian(
+    const torch::PackedTensorAccessor32<float,2,torch::RestrictPtrTraits> poses,
+    const torch::PackedTensorAccessor32<float,4,torch::RestrictPtrTraits> patches,
+    const torch::PackedTensorAccessor32<float,2,torch::RestrictPtrTraits> intrinsics,
+    const torch::PackedTensorAccessor32<float,2,torch::RestrictPtrTraits> target,
+    const torch::PackedTensorAccessor32<float,2,torch::RestrictPtrTraits> weight,
+    const torch::PackedTensorAccessor32<float,1,torch::RestrictPtrTraits> lmbda,
+    const torch::PackedTensorAccessor32<long,2,torch::RestrictPtrTraits> ij_xself,
+    const torch::PackedTensorAccessor32<long,1,torch::RestrictPtrTraits> ii,
+    const torch::PackedTensorAccessor32<long,1,torch::RestrictPtrTraits> jj,
+    const torch::PackedTensorAccessor32<long,1,torch::RestrictPtrTraits> kk,
+    const torch::PackedTensorAccessor32<long,1,torch::RestrictPtrTraits> ku,
+    torch::PackedTensorAccessor32<double,1,torch::RestrictPtrTraits> r_total,
+    torch::PackedTensorAccessor32<mtype,3,torch::RestrictPtrTraits> E_lookup,
+    torch::PackedTensorAccessor32<mtype,2,torch::RestrictPtrTraits> B,
+    torch::PackedTensorAccessor32<mtype,2,torch::RestrictPtrTraits> E,
+    torch::PackedTensorAccessor32<mtype,1,torch::RestrictPtrTraits> C,
+    torch::PackedTensorAccessor32<mtype,1,torch::RestrictPtrTraits> v,
+    torch::PackedTensorAccessor32<mtype,1,torch::RestrictPtrTraits> u, const int t0, const int ppf)
+{
+  __shared__ float fx, fy, cx, cy;
+  if (threadIdx.x == 0) {
+    fx = intrinsics[0][0];
+    fy = intrinsics[0][1];
+    cx = intrinsics[0][2];
+    cy = intrinsics[0][3];
+  }
+  bool eff_impl = (ppf > 0);
+  __syncthreads();
+  GPU_1D_KERNEL_LOOP(n, ii.size(0)) {
+    int k = ku[n]; // inverse indices
+    int ix = ii[n];
+    int jx = jj[n];
+    int kx = kk[n]; // actual
+    int ijx, ijs;
+    if (eff_impl){
+      ijx = ij_xself[0][n];
+      ijs = ij_xself[1][n];
+    }
+    float ti[3] = { poses[ix][0], poses[ix][1], poses[ix][2] };
+    float tj[3] = { poses[jx][0], poses[jx][1], poses[jx][2] };
+    float qi[4] = { poses[ix][3], poses[ix][4], poses[ix][5], poses[ix][6] };
+    float qj[4] = { poses[jx][3], poses[jx][4], poses[jx][5], poses[jx][6] };
+    float Xi[4], Xj[4];
+    Xi[0] = (patches[kx][0][1][1] - cx) / fx;
+    Xi[1] = (patches[kx][1][1][1] - cy) / fy;
+    Xi[2] = 1.0;
+    Xi[3] = patches[kx][2][1][1];
+    float tij[3], qij[4];
+    relSE3(ti, qi, tj, qj, tij, qij);
+    actSE3(tij, qij, Xi, Xj);
+    const float X = Xj[0];
+    const float Y = Xj[1];
+    const float Z = Xj[2];
+    const float W = Xj[3];
+    const float d = (Z >= 0.2) ? 1.0 / Z : 0.0;
+    const float d2 = d * d;
+    const float x1 = fx * (X / Z) + cx;
+    const float y1 = fy * (Y / Z) + cy;
+    const float rx = target[n][0] - x1;
+    const float ry = target[n][1] - y1;
+    const bool in_bounds = (sqrt(rx*rx + ry*ry) < 128) && (Z > 0.2) &&
+      (x1 > -64) && (y1 > -64) && (x1 < 2*cx + 64) && (y1 < 2*cy + 64);
+    const float mask = in_bounds ? 1.0 : 0.0;
+    ix = ix - t0;
+    jx = jx - t0;
+    for (int row=0; row<2; row++) {
+      float *Jj, Ji[6], Jz, r, w;
+      if (row == 0){
+        r = target[n][0] - x1;
+        w = mask * weight[n][0];
+        Jz = fx * (tij[0] * d - tij[2] * (X * d2));
+        Jj = (float[6]){fx*W*d, 0, fx*-X*W*d2, fx*-X*Y*d2, fx*(1+X*X*d2), fx*-Y*d};
+      } else {
+        r = target[n][1] - y1;
+        w = mask * weight[n][1];
+        Jz = fy * (tij[1] * d - tij[2] * (Y * d2));
+        Jj = (float[6]){0, fy*W*d, fy*-Y*W*d2, fy*(-1-Y*Y*d2), fy*(X*Y*d2), fy*X*d};
+      }
+      atomicAdd(&r_total[0],  w * r * r);
+      adjSE3(tij, qij, Jj, Ji);
+      for (int i=0; i<6; i++) {
+        for (int j=0; j<6; j++) {
+          if (ix >= 0)
+            atomicAdd(&B[6*ix+i][6*ix+j],  w * Ji[i] * Ji[j]);
+          if (jx >= 0)
+            atomicAdd(&B[6*jx+i][6*jx+j],  w * Jj[i] * Jj[j]);
+          if (ix >= 0 && jx >= 0) {
+            atomicAdd(&B[6*ix+i][6*jx+j], -w * Ji[i] * Jj[j]);
+            atomicAdd(&B[6*jx+i][6*ix+j], -w * Jj[i] * Ji[j]);
+          }
+        }
+      }
+      for (int i=0; i<6; i++) {
+        if (eff_impl){
+          atomicAdd(&E_lookup[ijs][kx % ppf][i],  -w * Jz * Ji[i]);
+          atomicAdd(&E_lookup[ijx][kx % ppf][i],  w * Jz * Jj[i]);
+        } else {
+          if (ix >= 0)
+            atomicAdd(&E[6*ix+i][k], -w * Jz * Ji[i]);
+          if (jx >= 0)
+            atomicAdd(&E[6*jx+i][k],  w * Jz * Jj[i]);
+        }
+      }
+      for (int i=0; i<6; i++) {
+        if (ix >= 0)
+          atomicAdd(&v[6*ix+i], -w * r * Ji[i]);
+        if (jx >= 0)
+          atomicAdd(&v[6*jx+i],  w * r * Jj[i]);
+      }
+      atomicAdd(&C[k], w * Jz * Jz);
+      atomicAdd(&u[k], w *  r * Jz);
+    }
+  }
+}
+__global__ void reproject(
+    const torch::PackedTensorAccessor32<float,2,torch::RestrictPtrTraits> poses,
+    const torch::PackedTensorAccessor32<float,4,torch::RestrictPtrTraits> patches,
+    const torch::PackedTensorAccessor32<float,2,torch::RestrictPtrTraits> intrinsics,
+    const torch::PackedTensorAccessor32<long,1,torch::RestrictPtrTraits> ii,
+    const torch::PackedTensorAccessor32<long,1,torch::RestrictPtrTraits> jj,
+    const torch::PackedTensorAccessor32<long,1,torch::RestrictPtrTraits> kk,
+    torch::PackedTensorAccessor32<float,4,torch::RestrictPtrTraits> coords) {
+  __shared__ float fx, fy, cx, cy;
+  if (threadIdx.x == 0) {
+    fx = intrinsics[0][0];
+    fy = intrinsics[0][1];
+    cx = intrinsics[0][2];
+    cy = intrinsics[0][3];
+  }
+  __syncthreads();
+  GPU_1D_KERNEL_LOOP(n, ii.size(0)) {
+    int ix = ii[n];
+    int jx = jj[n];
+    int kx = kk[n];
+    float ti[3] = { poses[ix][0], poses[ix][1], poses[ix][2] };
+    float tj[3] = { poses[jx][0], poses[jx][1], poses[jx][2] };
+    float qi[4] = { poses[ix][3], poses[ix][4], poses[ix][5], poses[ix][6] };
+    float qj[4] = { poses[jx][3], poses[jx][4], poses[jx][5], poses[jx][6] };
+    float tij[3], qij[4];
+    relSE3(ti, qi, tj, qj, tij, qij);
+    float Xi[4], Xj[4];
+    for (int i=0; i<patches.size(2); i++) {
+      for (int j=0; j<patches.size(3); j++) {
+        Xi[0] = (patches[kx][0][i][j] - cx) / fx;
+        Xi[1] = (patches[kx][1][i][j] - cy) / fy;
+        Xi[2] = 1.0;
+        Xi[3] = patches[kx][2][i][j];
+        actSE3(tij, qij, Xi, Xj);
+        coords[n][0][i][j] = fx * (Xj[0] / Xj[2]) + cx;
+        coords[n][1][i][j] = fy * (Xj[1] / Xj[2]) + cy;
+        // coords[n][2][i][j] = 1.0 / Xj[2];
+      }
+    }
+  }
+}
+std::vector<torch::Tensor> cuda_ba(
+    torch::Tensor poses,
+    torch::Tensor patches,
+    torch::Tensor intrinsics,
+    torch::Tensor target,
+    torch::Tensor weight,
+    torch::Tensor lmbda,
+    torch::Tensor ii,
+    torch::Tensor jj,
+    torch::Tensor kk,
+    const int PPF,
+    const int t0, const int t1, const int iterations, bool eff_impl)
+{
+  auto ktuple = torch::_unique(kk, true, true);
+  torch::Tensor kx = std::get<0>(ktuple);
+  torch::Tensor ku = std::get<1>(ktuple);
+  const int N = t1 - t0;    // number of poses
+  const int M = kx.size(0); // number of patches
+  const int P = patches.size(3); // patch size
+  // auto opts = torch::TensorOptions()
+  //   .dtype(torch::kFloat32).device(torch::kCUDA);
+  poses = poses.view({-1, 7});
+  patches = patches.view({-1,3,P,P});
+  intrinsics = intrinsics.view({-1, 4});
+  target = target.view({-1, 2});
+  weight = weight.view({-1, 2});
+  const int num = ii.size(0);
+  torch::Tensor B = torch::empty({6*N, 6*N}, mdtype);
+  torch::Tensor E = torch::empty({0, 0}, mdtype);
+  torch::Tensor C = torch::empty({M}, mdtype);
+  torch::Tensor v = torch::empty({6*N}, mdtype);
+  torch::Tensor u = torch::empty({1*M}, mdtype);
+  torch::Tensor r_total = torch::empty({1}, torch::dtype(torch::kFloat64).device(torch::kCUDA));
+  auto blockE = std::make_unique<EfficentE>();
+  if (eff_impl)
+    blockE = std::make_unique<EfficentE>(ii, jj, kx, PPF, t0);
+  else
+    E = torch::empty({6*N, 1*M}, mdtype);
+  for (int itr=0; itr < iterations; itr++) {
+    B.zero_();
+    E.zero_();
+    C.zero_();
+    v.zero_();
+    u.zero_();
+    r_total.zero_();
+    blockE->E_lookup.zero_();
+    v = v.view({6*N});
+    u = u.view({1*M});
+    reprojection_residuals_and_hessian<<<NUM_BLOCKS(ii.size(0)), NUM_THREADS>>>(
+      poses.packed_accessor32<float,2,torch::RestrictPtrTraits>(),
+      patches.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
+      intrinsics.packed_accessor32<float,2,torch::RestrictPtrTraits>(),
+      target.packed_accessor32<float,2,torch::RestrictPtrTraits>(),
+      weight.packed_accessor32<float,2,torch::RestrictPtrTraits>(),
+      lmbda.packed_accessor32<float,1,torch::RestrictPtrTraits>(),
+      blockE->ij_xself.packed_accessor32<long,2,torch::RestrictPtrTraits>(),
+      ii.packed_accessor32<long,1,torch::RestrictPtrTraits>(),
+      jj.packed_accessor32<long,1,torch::RestrictPtrTraits>(),
+      kk.packed_accessor32<long,1,torch::RestrictPtrTraits>(),
+      ku.packed_accessor32<long,1,torch::RestrictPtrTraits>(),
+      r_total.packed_accessor32<double,1,torch::RestrictPtrTraits>(),
+      blockE->E_lookup.packed_accessor32<mtype,3,torch::RestrictPtrTraits>(),
+      B.packed_accessor32<mtype,2,torch::RestrictPtrTraits>(),
+      E.packed_accessor32<mtype,2,torch::RestrictPtrTraits>(),
+      C.packed_accessor32<mtype,1,torch::RestrictPtrTraits>(),
+      v.packed_accessor32<mtype,1,torch::RestrictPtrTraits>(),
+      u.packed_accessor32<mtype,1,torch::RestrictPtrTraits>(), t0, blockE->ppf);
+    // std::cout << "Total residuals: " << r_total.item<double>() << std::endl;
+    v = v.view({6*N, 1});
+    u = u.view({1*M, 1});
+    torch::Tensor Q = 1.0 / (C + lmbda).view({1, M});
+    if (t1 - t0 == 0) {
+      torch::Tensor Qt = torch::transpose(Q, 0, 1);
+      torch::Tensor dZ = Qt * u;
+      dZ = dZ.view({M});
+      patch_retr_kernel<<<NUM_BLOCKS(M), NUM_THREADS>>>(
+        kx.packed_accessor32<long,1,torch::RestrictPtrTraits>(),
+        patches.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
+        dZ.packed_accessor32<mtype,1,torch::RestrictPtrTraits>());
+    }  else {
+      torch::Tensor dX, dZ, Qt = torch::transpose(Q, 0, 1);
+      torch::Tensor I = torch::eye(6*N, mdtype);
+      if (eff_impl) {
+        torch::Tensor EQEt = blockE->computeEQEt(N, Q);
+        torch::Tensor EQu = blockE->computeEv(N, Qt * u);
+        torch::Tensor S = B - EQEt;
+        torch::Tensor y = v - EQu;
+        S += I * (1e-4 * S + 1.0);
+        torch::Tensor U = std::get<0>(at::linalg_cholesky_ex(S));
+        dX = torch::cholesky_solve(y, U);
+        torch::Tensor EtdX = blockE->computeEtv(M, dX);
+        dZ = Qt * (u - EtdX);
+      } else {
+        torch::Tensor EQ = E * Q;
+        torch::Tensor Et = torch::transpose(E, 0, 1);
+        torch::Tensor S = B - torch::matmul(EQ, Et);
+        torch::Tensor y = v - torch::matmul(EQ,  u);
+        S += I * (1e-4 * S + 1.0);
+        torch::Tensor U = std::get<0>(at::linalg_cholesky_ex(S));
+        dX = torch::cholesky_solve(y, U);
+        dZ = Qt * (u - torch::matmul(Et, dX));
+      }
+      dX = dX.view({N, 6});
+      dZ = dZ.view({M});
+      pose_retr_kernel<<<NUM_BLOCKS(N), NUM_THREADS>>>(t0, t1,
+          poses.packed_accessor32<float,2,torch::RestrictPtrTraits>(),
+          dX.packed_accessor32<mtype,2,torch::RestrictPtrTraits>());
+      patch_retr_kernel<<<NUM_BLOCKS(M), NUM_THREADS>>>(
+          kx.packed_accessor32<long,1,torch::RestrictPtrTraits>(),
+          patches.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
+          dZ.packed_accessor32<mtype,1,torch::RestrictPtrTraits>());
+    }
+  }
+  return {};
+}
+torch::Tensor cuda_reproject(
+    torch::Tensor poses,
+    torch::Tensor patches,
+    torch::Tensor intrinsics,
+    torch::Tensor ii,
+    torch::Tensor jj,
+    torch::Tensor kk)
+{
+  const int N = ii.size(0);
+  const int P = patches.size(3); // patch size
+  poses = poses.view({-1, 7});
+  patches = patches.view({-1,3,P,P});
+  intrinsics = intrinsics.view({-1, 4});
+  auto opts = torch::TensorOptions()
+    .dtype(torch::kFloat32).device(torch::kCUDA);
+  torch::Tensor coords = torch::empty({N, 2, P, P}, opts);
+  reproject<<<NUM_BLOCKS(N), NUM_THREADS>>>(
+    poses.packed_accessor32<float,2,torch::RestrictPtrTraits>(),
+    patches.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
+    intrinsics.packed_accessor32<float,2,torch::RestrictPtrTraits>(),
+    ii.packed_accessor32<long,1,torch::RestrictPtrTraits>(),
+    jj.packed_accessor32<long,1,torch::RestrictPtrTraits>(),
+    kk.packed_accessor32<long,1,torch::RestrictPtrTraits>(),
+    coords.packed_accessor32<float,4,torch::RestrictPtrTraits>());
+  return coords.view({1, N, 2, P, P});
+}

third-party/DPVO/dpvo/fastba/block_e.cu ADDED Viewed

	@@ -0,0 +1,300 @@

+#include <torch/extension.h>
+#include <vector>
+#include <unordered_set>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <chrono>
+#include <ATen/ATen.h>
+#include <ATen/NativeFunctions.h>
+#include <ATen/Parallel.h>
+#include <Eigen/Core>
+#include "block_e.cuh"
+#define GPU_1D_KERNEL_LOOP(i, n) \
+  for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x)
+#define NUM_THREADS 256
+#define NUM_BLOCKS(batch_size) ((batch_size + NUM_THREADS - 1) / NUM_THREADS)
+inline void release_assert(const char *file, int line, bool condition, const std::string &msg)
+{
+  if (!condition)
+  {
+    std::cout << (std::string("Assertion failed: ") + file + " (" + std::to_string(line) + ")\n" + msg + "\n") << std::endl;
+    exit(1);
+  }
+}
+#define RASSERT(c) release_assert(__FILE__, __LINE__, c, "<no-message>")
+#define MRASSERT(c, m) release_assert(__FILE__, __LINE__, c, m)
+#define CREATE_IDX_ACC(t, d)              \
+  const auto cpu_##t = t.to(torch::kCPU); \
+  const auto acc_##t = cpu_##t.accessor<long, d>();
+typedef Eigen::Array<long, -1, -1> IndexLookup;
+EfficentE::EfficentE() : ppf(0), t0(0) {
+    E_lookup = torch::empty({0, 0, 0}, mdtype);
+    ij_xself = torch::empty({2, 0}, torch::dtype(torch::kInt64).device(torch::kCUDA));
+}
+EfficentE::EfficentE(const torch::Tensor &ii, const torch::Tensor &jj, const torch::Tensor &ku, const int patches_per_frame, const int t0) : ppf(patches_per_frame), t0(t0)
+{
+  const long n_frames = std::max(ii.max().item<long>(), jj.max().item<long>()) + 1;
+  const auto ij_tuple = torch::_unique(torch::cat({ii * n_frames + jj, ii * n_frames + ii}), true, true);
+  torch::Tensor ij_uniq = std::get<0>(ij_tuple);
+  const long E = ii.size(0);
+  ij_xself = std::get<1>(ij_tuple).view({2, E});
+  E_lookup = torch::zeros({ij_uniq.size(0), ppf, 6}, mdtype);
+  { // Create mapping from (frame, patch) -> index in vec
+    patch_to_ku = torch::full({n_frames, ppf}, -1, torch::kInt64);
+    auto patch_to_ku_acc = patch_to_ku.accessor<long, 2>();
+    CREATE_IDX_ACC(ku, 1)
+    for (int idx = 0; idx < cpu_ku.size(0); idx++)
+    {
+      const long k = acc_ku[idx]; // the actual uniq value. idx is the row in Q where it was found
+      // RASSERT((patch_to_ku_acc[k / ppf][k % ppf] == idx) || (patch_to_ku_acc[k / ppf][k % ppf] == -1));
+      patch_to_ku_acc[k / ppf][k % ppf] = idx;
+    }
+  }
+  patch_to_ku = patch_to_ku.to(torch::kCUDA);
+  { // Create mapping from (i,j) -> E_lookup
+    IndexLookup frame_to_idx = IndexLookup::Constant(n_frames, n_frames, -1);
+    CREATE_IDX_ACC(ii, 1)
+    CREATE_IDX_ACC(jj, 1)
+    CREATE_IDX_ACC(ij_xself, 2)
+    for (int idx = 0; idx < E; idx++)
+    {
+      const long i = acc_ii[idx];
+      const long j = acc_jj[idx];
+      const long ijx = acc_ij_xself[0][idx];
+      const long ijs = acc_ij_xself[1][idx];
+      // RASSERT((frame_to_idx(i, j) == ijx) || (frame_to_idx(i, j) == -1));
+      // RASSERT((frame_to_idx(i, i) == ijs) || (frame_to_idx(i, i) == -1));
+      frame_to_idx(i, j) = ijx;
+      frame_to_idx(i, i) = ijs;
+    }
+    // lookup table for edges
+    const long E = cpu_ii.size(0);
+    std::vector<std::unordered_set<long>> edge_lookup(n_frames);
+    for (int x = 0; x < E; x++)
+    {
+      const long i = acc_ii[x];
+      const long j = acc_jj[x];
+      edge_lookup[i].insert(j);
+      edge_lookup[i].insert(i);
+      // RASSERT(j < n_frames);
+      // RASSERT(i < n_frames);
+      // MRASSERT(edge_lookup[i].size() < 30, "More edges than expected");
+    }
+    // std::cout << "#U" << std::endl;
+    int count = 0;
+    for (const auto &connected_frames : edge_lookup)
+      count += (connected_frames.size() * connected_frames.size());
+    // std::cout << "#V" << std::endl;
+    index_tensor = torch::empty({count, 5}, torch::kInt64);
+    auto index_tensor_acc = index_tensor.accessor<long, 2>();
+    // std::cout << "#W" << std::endl;
+    int cx = 0;
+    for (int i = 0; i < n_frames; i++)
+    {
+      const auto &connected_frames = edge_lookup[i];
+      for (const long &j1 : connected_frames)
+      {
+        for (const long &j2 : connected_frames)
+        {
+          index_tensor_acc[cx][0] = i;
+          index_tensor_acc[cx][1] = j1;
+          index_tensor_acc[cx][2] = j2;
+          index_tensor_acc[cx][3] = frame_to_idx(i, j1);
+          index_tensor_acc[cx][4] = frame_to_idx(i, j2);
+          cx += 1;
+        }
+      }
+    }
+    index_tensor = index_tensor.to(torch::kCUDA);
+    // RASSERT(cx == count);
+  }
+  {
+    CREATE_IDX_ACC(ij_uniq, 1)
+    const long count = ij_uniq.size(0);
+    block_index_tensor = torch::empty({count, 2}, torch::kInt64);
+    auto index_tensor_acc = block_index_tensor.accessor<long, 2>();
+    for (int idx = 0; idx < count; idx++)
+    {
+      const long ij = acc_ij_uniq[idx];
+      const long i = ij / n_frames;
+      const long j = ij % n_frames;
+      index_tensor_acc[idx][0] = i;
+      index_tensor_acc[idx][1] = j;
+    }
+    block_index_tensor = block_index_tensor.to(torch::kCUDA);
+  }
+}
+__global__ void EEt_kernel(
+    torch::PackedTensorAccessor32<mtype, 2, torch::RestrictPtrTraits> EEt,
+    const torch::PackedTensorAccessor32<mtype, 3, torch::RestrictPtrTraits> E_lookup,
+    const torch::PackedTensorAccessor32<mtype, 1, torch::RestrictPtrTraits> Q,
+    const torch::PackedTensorAccessor32<long, 2, torch::RestrictPtrTraits> index_tensor,
+    const torch::PackedTensorAccessor32<long, 2, torch::RestrictPtrTraits> patch_to_ku, const int t0, const int ppf)
+{
+  GPU_1D_KERNEL_LOOP(n, index_tensor.size(0) * ppf)
+  {
+    int k = n % ppf; // src patch
+    int idx = n / ppf;
+    int i = index_tensor[idx][0];  // src frame
+    int j1 = index_tensor[idx][1]; // dest j1
+    int j2 = index_tensor[idx][2]; // dest j2
+    int j1_idx = index_tensor[idx][3]; // index for first slice
+    int j2_idx = index_tensor[idx][4]; // index for second slice
+    const auto j1_slice = E_lookup[j1_idx][k]; // 6
+    const auto j2_slice = E_lookup[j2_idx][k]; // 6
+    j1 = j1 - t0;
+    j2 = j2 - t0;
+    for (int xi = 0; xi < 6; xi++)
+    {
+      for (int xj = 0; xj < 6; xj++)
+      {
+        if ((j1 >= 0) && (j2 >= 0))
+        {
+          long q_idx = patch_to_ku[i][k];
+          float q = Q[q_idx];
+          atomicAdd(&EEt[6 * j1 + xi][6 * j2 + xj], j1_slice[xi] * j2_slice[xj] * q);
+        }
+      }
+    }
+  }
+}
+torch::Tensor EfficentE::computeEQEt(const int N, const torch::Tensor &Q) const
+{
+  torch::Tensor EEt = torch::zeros({6 * N, 6 * N}, mdtype);
+  const auto tmp_Q = Q.view({-1});
+  EEt_kernel<<<NUM_BLOCKS(index_tensor.size(0) * ppf), NUM_THREADS>>>(
+      EEt.packed_accessor32<mtype, 2, torch::RestrictPtrTraits>(),
+      E_lookup.packed_accessor32<mtype, 3, torch::RestrictPtrTraits>(),
+      tmp_Q.packed_accessor32<mtype, 1, torch::RestrictPtrTraits>(),
+      index_tensor.packed_accessor32<long, 2, torch::RestrictPtrTraits>(),
+      patch_to_ku.packed_accessor32<long, 2, torch::RestrictPtrTraits>(),
+      t0, ppf);
+  return EEt;
+}
+__global__ void Ev_kernel(
+    torch::PackedTensorAccessor32<mtype, 1, torch::RestrictPtrTraits> Ev,
+    const torch::PackedTensorAccessor32<mtype, 3, torch::RestrictPtrTraits> E_lookup,
+    const torch::PackedTensorAccessor32<mtype, 1, torch::RestrictPtrTraits> vec,
+    const torch::PackedTensorAccessor32<long, 2, torch::RestrictPtrTraits> index_tensor,
+    const torch::PackedTensorAccessor32<long, 2, torch::RestrictPtrTraits> patch_to_ku, const int t0, const int ppf)
+{
+  GPU_1D_KERNEL_LOOP(n, index_tensor.size(0) * ppf)
+  {
+    int k = n % ppf; // src patch
+    int idx = n / ppf;
+    int i = index_tensor[idx][0];
+    int j = index_tensor[idx][1];
+    auto slice = E_lookup[idx][k]; // 6
+    long q_idx = patch_to_ku[i][k];
+    float v = vec[q_idx];
+    j = j - t0; // i not used anymore
+    for (int r = 0; r < 6; r++)
+    {
+      if (j >= 0)
+      {
+        atomicAdd(&Ev[j * 6 + r], slice[r] * v);
+      }
+    }
+  }
+}
+torch::Tensor EfficentE::computeEv(const int N, const torch::Tensor &vec) const
+{
+  torch::Tensor Ev = torch::zeros({6 * N}, mdtype);
+  const auto tmp_vec = vec.view({-1});
+  Ev_kernel<<<NUM_BLOCKS(E_lookup.size(0) * ppf), NUM_THREADS>>>(
+      Ev.packed_accessor32<mtype, 1, torch::RestrictPtrTraits>(),
+      E_lookup.packed_accessor32<mtype, 3, torch::RestrictPtrTraits>(),
+      tmp_vec.packed_accessor32<mtype, 1, torch::RestrictPtrTraits>(),
+      block_index_tensor.packed_accessor32<long, 2, torch::RestrictPtrTraits>(),
+      patch_to_ku.packed_accessor32<long, 2, torch::RestrictPtrTraits>(),
+      t0, ppf);
+  Ev = Ev.view({-1, 1});
+  return Ev;
+}
+__global__ void Etv_kernel(
+    torch::PackedTensorAccessor32<mtype, 1, torch::RestrictPtrTraits> Etv,
+    const torch::PackedTensorAccessor32<mtype, 3, torch::RestrictPtrTraits> E_lookup,
+    const torch::PackedTensorAccessor32<mtype, 1, torch::RestrictPtrTraits> vec,
+    const torch::PackedTensorAccessor32<long, 2, torch::RestrictPtrTraits> index_tensor,
+    const torch::PackedTensorAccessor32<long, 2, torch::RestrictPtrTraits> patch_to_ku, const int t0, const int ppf)
+{
+  GPU_1D_KERNEL_LOOP(n, index_tensor.size(0) * ppf)
+  {
+    int k = n % ppf; // src patch
+    int idx = n / ppf;
+    int i = index_tensor[idx][0];
+    int j = index_tensor[idx][1];
+    auto slice = E_lookup[idx][k]; // 6
+    long q_idx = patch_to_ku[i][k];
+    j = j - t0; // i not used anymore
+    for (int r = 0; r < 6; r++)
+    {
+      if (j >= 0)
+      {
+        float dp = slice[r] * vec[j * 6 + r];
+        atomicAdd(&Etv[q_idx], dp);
+      }
+    }
+  }
+}
+torch::Tensor EfficentE::computeEtv(const int M, const torch::Tensor &vec) const
+{
+  torch::Tensor Etv = torch::zeros({M}, mdtype);
+  const auto tmp_vec = vec.view({-1});
+  Etv_kernel<<<NUM_BLOCKS(E_lookup.size(0) * ppf), NUM_THREADS>>>(
+      Etv.packed_accessor32<mtype, 1, torch::RestrictPtrTraits>(),
+      E_lookup.packed_accessor32<mtype, 3, torch::RestrictPtrTraits>(),
+      tmp_vec.packed_accessor32<mtype, 1, torch::RestrictPtrTraits>(),
+      block_index_tensor.packed_accessor32<long, 2, torch::RestrictPtrTraits>(),
+      patch_to_ku.packed_accessor32<long, 2, torch::RestrictPtrTraits>(),
+      t0, ppf);
+  Etv = Etv.view({-1, 1});
+  return Etv;
+}

third-party/DPVO/dpvo/fastba/block_e.cuh ADDED Viewed

	@@ -0,0 +1,26 @@

+#include <ATen/ATen.h>
+#include <ATen/NativeFunctions.h>
+#include <ATen/Parallel.h>
+const auto mdtype = torch::dtype(torch::kFloat32).device(torch::kCUDA);
+typedef float mtype;
+class EfficentE
+{
+private:
+    torch::Tensor block_index_tensor, index_tensor, patch_to_ku;
+    const int t0;
+public:
+    const int ppf;
+    torch::Tensor E_lookup, ij_xself;
+    EfficentE(const torch::Tensor &ii, const torch::Tensor &jj, const torch::Tensor &ku, const int patches_per_frame, const int t0);
+    EfficentE();
+    torch::Tensor computeEQEt(const int N, const torch::Tensor &Q) const;
+    torch::Tensor computeEv(const int N, const torch::Tensor &vec) const;
+    torch::Tensor computeEtv(const int M, const torch::Tensor &vec) const;
+};

third-party/DPVO/dpvo/lietorch/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ __all__ = ['groups']
2	+ from .groups import LieGroupParameter, SO3, RxSO3, SE3, Sim3, cat, stack

third-party/DPVO/dpvo/lietorch/broadcasting.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import torch
+import numpy as np
+def check_broadcastable(x, y):
+    assert len(x.shape) == len(y.shape)
+    for (n, m) in zip(x.shape[:-1], y.shape[:-1]):
+        assert n==m or n==1 or m==1
+def broadcast_inputs(x, y):
+    """ Automatic broadcasting of missing dimensions """
+    if y is None:
+        xs, xd = x.shape[:-1], x.shape[-1]
+        return (x.view(-1, xd).contiguous(), ), x.shape[:-1]
+    check_broadcastable(x, y)
+    xs, xd = x.shape[:-1], x.shape[-1]
+    ys, yd = y.shape[:-1], y.shape[-1]
+    out_shape = [max(n,m) for (n,m) in zip(xs,ys)]
+    if x.shape[:-1] == y.shape[-1]:
+        x1 = x.view(-1, xd)
+        y1 = y.view(-1, yd)
+    else:
+        x_expand = [m if n==1 else 1 for (n,m) in zip(xs, ys)]
+        y_expand = [n if m==1 else 1 for (n,m) in zip(xs, ys)]
+        x1 = x.repeat(x_expand + [1]).reshape(-1, xd).contiguous()
+        y1 = y.repeat(y_expand + [1]).reshape(-1, yd).contiguous()
+    return (x1, y1), tuple(out_shape)

third-party/DPVO/dpvo/lietorch/gradcheck.py ADDED Viewed

	@@ -0,0 +1,592 @@

+import torch
+TORCH_MAJOR = int(torch.__version__.split('.')[0])
+TORCH_MINOR = int(torch.__version__.split('.')[1])
+from torch.types import _TensorOrTensors
+if TORCH_MAJOR == 1 and TORCH_MINOR < 8:
+    from torch._six import container_abcs, istuple
+else:
+    import collections.abc as container_abcs
+import torch.testing
+from torch.overrides import is_tensor_like
+from itertools import product
+import warnings
+from typing import Callable, Union, Optional, Iterable, List
+def zero_gradients(x):
+    if isinstance(x, torch.Tensor):
+        if x.grad is not None:
+            x.grad.detach_()
+            x.grad.zero_()
+    elif isinstance(x, container_abcs.Iterable):
+        for elem in x:
+            zero_gradients(elem)
+def make_jacobian(input, num_out):
+    if is_tensor_like(input):
+        if not input.is_floating_point() and not input.is_complex():
+            return None
+        if not input.requires_grad:
+            return None
+        return input.new_zeros((input.nelement(), num_out), dtype=input.dtype, layout=torch.strided)
+    elif isinstance(input, container_abcs.Iterable) and not isinstance(input, str):
+        jacobians = list(filter(
+            lambda x: x is not None, (make_jacobian(elem, num_out) for elem in input)))
+        if not jacobians:
+            return None
+        return type(input)(jacobians)  # type: ignore
+    else:
+        return None
+def iter_tensors(x: Union[torch.Tensor, Iterable[torch.Tensor]], only_requiring_grad: bool = False) -> Iterable[torch.Tensor]:
+    if is_tensor_like(x):
+        # mypy doesn't narrow type of `x` to torch.Tensor
+        if x.requires_grad or not only_requiring_grad:  # type: ignore
+            yield x  # type: ignore
+    elif isinstance(x, container_abcs.Iterable) and not isinstance(x, str):
+        for elem in x:
+            for result in iter_tensors(elem, only_requiring_grad):
+                yield result
+def get_numerical_jacobian(fn, input, target=None, eps=1e-3, grad_out=1.0):
+    """
+    input: input to `fn`
+    target: the Tensors wrt whom Jacobians are calculated (default=`input`)
+    grad_out: grad output value used to calculate gradients.
+    Note that `target` may not even be part of `input` to `fn`, so please be
+    **very careful** in this to not clone `target`.
+    """
+    if target is None:
+        target = input
+    output_size = fn(input).numel()
+    jacobian = make_jacobian(target, output_size)
+    # It's much easier to iterate over flattened lists of tensors.
+    # These are reference to the same objects in jacobian, so any changes
+    # will be reflected in it as well.
+    x_tensors = iter_tensors(target, True)
+    j_tensors = iter_tensors(jacobian)
+    def update_jacobians(x, idx, d, d_idx, is_mkldnn=False):
+        # compute_jacobian only works for pure real
+        # or pure imaginary delta
+        def compute_gradient(delta):
+            # we currently assume that the norm of delta equals eps
+            assert(delta == eps or delta == (eps * 1j))
+            def fn_out():
+                if not is_mkldnn:
+                    # x is a view into input and so this works
+                    return fn(input).clone()
+                else:
+                    # convert the dense tensor back to have mkldnn layout
+                    return fn([x.to_mkldnn()])
+            orig = x[idx].item()
+            x[idx] = orig - delta
+            outa = fn_out()
+            x[idx] = orig + delta
+            outb = fn_out()
+            x[idx] = orig
+            r = (outb - outa) / (2 * eps)
+            return r.detach().reshape(-1)
+        # for details on the algorithm used here, refer:
+        # Section 3.5.3 https://arxiv.org/pdf/1701.00392.pdf
+        # s = fn(z) where z = x for real valued input
+        # and z = x + yj for complex valued input
+        ds_dx = compute_gradient(eps)
+        if x.is_complex():  # C -> C, C -> R
+            ds_dy = compute_gradient(eps * 1j)
+            # conjugate wirtinger derivative
+            conj_w_d = 0.5 * (ds_dx + ds_dy * 1j)
+            # wirtinger derivative
+            w_d = 0.5 * (ds_dx - ds_dy * 1j)
+            d[d_idx] = grad_out.conjugate() * conj_w_d + grad_out * w_d.conj()
+        elif ds_dx.is_complex():  # R -> C
+            # w_d = conj_w_d = 0.5 * ds_dx
+            # dL_dz_conj = 0.5 * [grad_out.conj() * ds_dx + grad_out * ds_dx.conj()]
+            #            = 0.5 * [grad_out.conj() * ds_dx + (grad_out.conj() * ds_dx).conj()]
+            #            = 0.5 * 2 * real(grad_out.conj() * ds_dx)
+            #            = real(grad_out.conj() * ds_dx)
+            d[d_idx] = torch.real(grad_out.conjugate() * ds_dx)
+        else:   # R -> R
+            d[d_idx] = ds_dx * grad_out
+    # TODO: compare structure
+    for x_tensor, d_tensor in zip(x_tensors, j_tensors):
+        if x_tensor.is_sparse:
+            def get_stride(size):
+                dim = len(size)
+                tmp = 1
+                stride = [0] * dim
+                for i in reversed(range(dim)):
+                    stride[i] = tmp
+                    tmp *= size[i]
+                return stride
+            x_nnz = x_tensor._nnz()
+            x_size = list(x_tensor.size())
+            x_indices = x_tensor._indices().t()
+            x_values = x_tensor._values()
+            x_stride = get_stride(x_size)
+            # Use .data here to get around the version check
+            x_values = x_values.data
+            for i in range(x_nnz):
+                x_value = x_values[i]
+                for x_idx in product(*[range(m) for m in x_values.size()[1:]]):
+                    indices = x_indices[i].tolist() + list(x_idx)
+                    d_idx = sum(indices[k] * x_stride[k] for k in range(len(x_size)))
+                    update_jacobians(x_value, x_idx, d_tensor, d_idx)
+        elif x_tensor.layout == torch._mkldnn:  # type: ignore
+            # Use .data here to get around the version check
+            x_tensor = x_tensor.data
+            if len(input) != 1:
+                raise ValueError('gradcheck currently only supports functions with 1 input, but got: ',
+                                 len(input))
+            for d_idx, x_idx in enumerate(product(*[range(m) for m in x_tensor.size()])):
+                # this is really inefficient, but without indexing implemented, there's
+                # not really a better way than converting back and forth
+                x_tensor_dense = x_tensor.to_dense()
+                update_jacobians(x_tensor_dense, x_idx, d_tensor, d_idx, is_mkldnn=True)
+        else:
+            # Use .data here to get around the version check
+            x_tensor = x_tensor.data
+            for d_idx, x_idx in enumerate(product(*[range(m) for m in x_tensor.size()])):
+                update_jacobians(x_tensor, x_idx, d_tensor, d_idx)
+    return jacobian
+def get_analytical_jacobian(input, output, nondet_tol=0.0, grad_out=1.0):
+    # it is easier to call to_dense() on the sparse output than
+    # to modify analytical jacobian
+    if output.is_sparse:
+        raise ValueError('Sparse output is not supported at gradcheck yet. '
+                         'Please call to_dense() on the output of fn for gradcheck.')
+    if output.layout == torch._mkldnn:  # type: ignore
+        raise ValueError('MKLDNN output is not supported at gradcheck yet. '
+                         'Please call to_dense() on the output of fn for gradcheck.')
+    diff_input_list = list(iter_tensors(input, True))
+    jacobian = make_jacobian(input, output.numel())
+    jacobian_reentrant = make_jacobian(input, output.numel())
+    grad_output = torch.zeros_like(output, memory_format=torch.legacy_contiguous_format)
+    flat_grad_output = grad_output.view(-1)
+    reentrant = True
+    correct_grad_sizes = True
+    correct_grad_types = True
+    for i in range(flat_grad_output.numel()):
+        flat_grad_output.zero_()
+        flat_grad_output[i] = grad_out
+        for jacobian_c in (jacobian, jacobian_reentrant):
+            grads_input = torch.autograd.grad(output, diff_input_list, grad_output,
+                                              retain_graph=True, allow_unused=True)
+            for jacobian_x, d_x, x in zip(jacobian_c, grads_input, diff_input_list):
+                if d_x is not None and d_x.size() != x.size():
+                    correct_grad_sizes = False
+                elif d_x is not None and d_x.dtype != x.dtype:
+                    correct_grad_types = False
+                elif jacobian_x.numel() != 0:
+                    if d_x is None:
+                        jacobian_x[:, i].zero_()
+                    else:
+                        d_x_dense = d_x.to_dense() if not d_x.layout == torch.strided else d_x
+                        assert jacobian_x[:, i].numel() == d_x_dense.numel()
+                        jacobian_x[:, i] = d_x_dense.contiguous().view(-1)
+    for jacobian_x, jacobian_reentrant_x in zip(jacobian, jacobian_reentrant):
+        if jacobian_x.numel() != 0 and (jacobian_x - jacobian_reentrant_x).abs().max() > nondet_tol:
+            reentrant = False
+    return jacobian, reentrant, correct_grad_sizes, correct_grad_types
+def _as_tuple(x):
+    if TORCH_MAJOR == 1 and TORCH_MINOR < 8:
+        b_tuple = istuple(x)
+    else:
+        b_tuple = isinstance(x, tuple)
+    if b_tuple:
+        return x
+    elif isinstance(x, list):
+        return tuple(x)
+    else:
+        return x,
+def _differentiable_outputs(x):
+    return tuple(o for o in _as_tuple(x) if o.requires_grad)
+# Note [VarArg of Tensors]
+# ~~~~~~~~~~~~~~~~~~~~~~~~
+# 'func' accepts a vararg of tensors, which isn't expressable in the type system at the moment.
+# If https://mypy.readthedocs.io/en/latest/additional_features.html?highlight=callable#extended-callable-types is accepted,
+# the '...' first argument of Callable can be replaced with VarArg(Tensor).
+# For now, we permit any input.
+# the '...' first argument of Callable can be replaced with VarArg(Tensor).
+# For now, we permit any input.
+def gradcheck(
+    func: Callable[..., Union[_TensorOrTensors]],  # See Note [VarArg of Tensors]
+    inputs: _TensorOrTensors,
+    eps: float = 1e-6,
+    atol: float = 1e-5,
+    rtol: float = 1e-3,
+    raise_exception: bool = True,
+    check_sparse_nnz: bool = False,
+    nondet_tol: float = 0.0,
+    check_undefined_grad: bool = True,
+    check_grad_dtypes: bool = False
+) -> bool:
+    r"""Check gradients computed via small finite differences against analytical
+    gradients w.r.t. tensors in :attr:`inputs` that are of floating point or complex type
+    and with ``requires_grad=True``.
+    The check between numerical and analytical gradients uses :func:`~torch.allclose`.
+    For complex functions, no notion of Jacobian exists. Gradcheck verifies if the numerical and
+    analytical values of Wirtinger and Conjugate Wirtinger derivative are consistent. The gradient
+    computation is done under the assumption that the overall function has a real valued output.
+    For functions with complex output, gradcheck compares the numerical and analytical gradients
+    for two values of :attr:`grad_output`: 1 and 1j. For more details, check out
+    :ref:`complex_autograd-doc`.
+    .. note::
+        The default values are designed for :attr:`input` of double precision.
+        This check will likely fail if :attr:`input` is of less precision, e.g.,
+        ``FloatTensor``.
+    .. warning::
+       If any checked tensor in :attr:`input` has overlapping memory, i.e.,
+       different indices pointing to the same memory address (e.g., from
+       :func:`torch.expand`), this check will likely fail because the numerical
+       gradients computed by point perturbation at such indices will change
+       values at all other indices that share the same memory address.
+    Args:
+        func (function): a Python function that takes Tensor inputs and returns
+            a Tensor or a tuple of Tensors
+        inputs (tuple of Tensor or Tensor): inputs to the function
+        eps (float, optional): perturbation for finite differences
+        atol (float, optional): absolute tolerance
+        rtol (float, optional): relative tolerance
+        raise_exception (bool, optional): indicating whether to raise an exception if
+            the check fails. The exception gives more information about the
+            exact nature of the failure. This is helpful when debugging gradchecks.
+        check_sparse_nnz (bool, optional): if True, gradcheck allows for SparseTensor input,
+            and for any SparseTensor at input, gradcheck will perform check at nnz positions only.
+        nondet_tol (float, optional): tolerance for non-determinism. When running
+            identical inputs through the differentiation, the results must either match
+            exactly (default, 0.0) or be within this tolerance.
+        check_undefined_grad (bool, options): if True, check if undefined output grads
+            are supported and treated as zeros, for ``Tensor`` outputs.
+    Returns:
+        True if all differences satisfy allclose condition
+    """
+    def fail_test(msg):
+        if raise_exception:
+            raise RuntimeError(msg)
+        return False
+    tupled_inputs = _as_tuple(inputs)
+    if not check_sparse_nnz and any(t.is_sparse for t in tupled_inputs if isinstance(t, torch.Tensor)):
+        return fail_test('gradcheck expects all tensor inputs are dense when check_sparse_nnz is set to False.')
+    # Make sure that gradients are saved for at least one input
+    any_input_requiring_grad = False
+    for idx, inp in enumerate(tupled_inputs):
+        if is_tensor_like(inp) and inp.requires_grad:
+            if not (inp.dtype == torch.float64 or inp.dtype == torch.complex128):
+                warnings.warn(
+                    f'Input #{idx} requires gradient and '
+                    'is not a double precision floating point or complex. '
+                    'This check will likely fail if all the inputs are '
+                    'not of double precision floating point or complex. ')
+            content = inp._values() if inp.is_sparse else inp
+            # TODO: To cover more problematic cases, replace stride = 0 check with
+            # "any overlap in memory" once we have a proper function to check it.
+            if content.layout is not torch._mkldnn:  # type: ignore
+                if not all(st > 0 or sz <= 1 for st, sz in zip(content.stride(), content.size())):
+                    raise RuntimeError(
+                        'The {}th input has a dimension with stride 0. gradcheck only '
+                        'supports inputs that are non-overlapping to be able to '
+                        'compute the numerical gradients correctly. You should call '
+                        '.contiguous on the input before passing it to gradcheck.')
+            any_input_requiring_grad = True
+            inp.retain_grad()
+    if not any_input_requiring_grad:
+        raise ValueError(
+            'gradcheck expects at least one input tensor to require gradient, '
+            'but none of the them have requires_grad=True.')
+    func_out = func(*tupled_inputs)
+    output = _differentiable_outputs(func_out)
+    if not output:
+        for i, o in enumerate(func_out):
+            def fn(input):
+                return _as_tuple(func(*input))[i]
+            numerical = get_numerical_jacobian(fn, tupled_inputs, eps=eps)
+            for n in numerical:
+                if torch.ne(n, 0).sum() > 0:
+                    return fail_test('Numerical gradient for function expected to be zero')
+        return True
+    for i, o in enumerate(output):
+        if not o.requires_grad:
+            continue
+        def fn(input):
+            return _as_tuple(func(*input))[i]
+        analytical, reentrant, correct_grad_sizes, correct_grad_types = get_analytical_jacobian(tupled_inputs,
+                                                                                                o,
+                                                                                                nondet_tol=nondet_tol)
+        numerical = get_numerical_jacobian(fn, tupled_inputs, eps=eps)
+        return analytical, numerical
+        out_is_complex = o.is_complex()
+        if out_is_complex:
+            # analytical vjp with grad_out = 1.0j
+            analytical_with_imag_grad_out, reentrant_with_imag_grad_out, \
+                correct_grad_sizes_with_imag_grad_out, correct_grad_types_with_imag_grad_out \
+                = get_analytical_jacobian(tupled_inputs, o, nondet_tol=nondet_tol, grad_out=1j)
+            numerical_with_imag_grad_out = get_numerical_jacobian(fn, tupled_inputs, eps=eps, grad_out=1j)
+        if not correct_grad_types and check_grad_dtypes:
+            return fail_test('Gradient has dtype mismatch')
+        if out_is_complex and not correct_grad_types_with_imag_grad_out and check_grad_dtypes:
+            return fail_test('Gradient (calculated using complex valued grad output) has dtype mismatch')
+        if not correct_grad_sizes:
+            return fail_test('Analytical gradient has incorrect size')
+        if out_is_complex and not correct_grad_sizes_with_imag_grad_out:
+            return fail_test('Analytical gradient (calculated using complex valued grad output) has incorrect size')
+        def checkIfNumericalAnalyticAreClose(a, n, j, error_str=''):
+            if not torch.allclose(a, n, rtol, atol):
+                return fail_test(error_str + 'Jacobian mismatch for output %d with respect to input %d,\n'
+                                 'numerical:%s\nanalytical:%s\n' % (i, j, n, a))
+        inp_tensors = iter_tensors(tupled_inputs, True)
+        for j, (a, n, inp) in enumerate(zip(analytical, numerical, inp_tensors)):
+            if a.numel() != 0 or n.numel() != 0:
+                if o.is_complex():
+                    # C -> C, R -> C
+                    a_with_imag_grad_out = analytical_with_imag_grad_out[j]
+                    n_with_imag_grad_out = numerical_with_imag_grad_out[j]
+                    checkIfNumericalAnalyticAreClose(a_with_imag_grad_out, n_with_imag_grad_out, j,
+                                                     "Gradients failed to compare equal for grad output = 1j. ")
+                if inp.is_complex():
+                    # C -> R, C -> C
+                    checkIfNumericalAnalyticAreClose(a, n, j,
+                                                     "Gradients failed to compare equal for grad output = 1. ")
+                else:
+                    # R -> R, R -> C
+                    checkIfNumericalAnalyticAreClose(a, n, j)
+        def not_reentrant_error(error_str=''):
+            error_msg = "Backward" + error_str + " is not reentrant, i.e., running backward with same \
+                        input and grad_output multiple times gives different values, \
+                        although analytical gradient matches numerical gradient. \
+                        The tolerance for nondeterminism was {}.".format(nondet_tol)
+            return fail_test(error_msg)
+        if not reentrant:
+            return not_reentrant_error()
+        if out_is_complex and not reentrant_with_imag_grad_out:
+            return not_reentrant_error(' (calculated using complex valued grad output)')
+    # check if the backward multiplies by grad_output
+    output = _differentiable_outputs(func(*tupled_inputs))
+    if any([o.requires_grad for o in output]):
+        diff_input_list: List[torch.Tensor] = list(iter_tensors(tupled_inputs, True))
+        if not diff_input_list:
+            raise RuntimeError("no Tensors requiring grad found in input")
+        grads_input = torch.autograd.grad(output, diff_input_list,
+                                          [torch.zeros_like(o, memory_format=torch.legacy_contiguous_format) for o in output],
+                                          allow_unused=True)
+        for gi, di in zip(grads_input, diff_input_list):
+            if gi is None:
+                continue
+            if isinstance(gi, torch.Tensor) and gi.layout != torch.strided:
+                if gi.layout != di.layout:
+                    return fail_test('grad is incorrect layout (' + str(gi.layout) + ' is not ' + str(di.layout) + ')')
+                if gi.layout == torch.sparse_coo:
+                    if gi.sparse_dim() != di.sparse_dim():
+                        return fail_test('grad is sparse tensor, but has incorrect sparse_dim')
+                    if gi.dense_dim() != di.dense_dim():
+                        return fail_test('grad is sparse tensor, but has incorrect dense_dim')
+                gi = gi.to_dense()
+                di = di.to_dense()
+            if not gi.eq(0).all():
+                return fail_test('backward not multiplied by grad_output')
+            if gi.dtype != di.dtype or gi.device != di.device or gi.is_sparse != di.is_sparse:
+                return fail_test("grad is incorrect type")
+            if gi.size() != di.size():
+                return fail_test('grad is incorrect size')
+        if check_undefined_grad:
+            def warn_bc_breaking():
+                warnings.warn((
+                    'Backwards compatibility: New undefined gradient support checking '
+                    'feature is enabled by default, but it may break existing callers '
+                    'of this function. If this is true for you, you can call this '
+                    'function with "check_undefined_grad=False" to disable the feature'))
+            def check_undefined_grad_support(output_to_check):
+                grads_output = [torch.zeros_like(o, memory_format=torch.legacy_contiguous_format) for o in output_to_check]
+                try:
+                    grads_input = torch.autograd.grad(output_to_check,
+                                                      diff_input_list,
+                                                      grads_output,
+                                                      allow_unused=True)
+                except RuntimeError:
+                    warn_bc_breaking()
+                    return fail_test((
+                        'Expected backward function to handle undefined output grads. '
+                        'Please look at "Notes about undefined output gradients" in '
+                        '"tools/autograd/derivatives.yaml"'))
+                for gi, i in zip(grads_input, diff_input_list):
+                    if (gi is not None) and (not gi.eq(0).all()):
+                        warn_bc_breaking()
+                        return fail_test((
+                            'Expected all input grads to be undefined or zero when all output grads are undefined '
+                            'or zero. Please look at "Notes about undefined output gradients" in '
+                            '"tools/autograd/derivatives.yaml"'))
+                return True
+            # All backward functions must work properly if all output grads are undefined
+            outputs_to_check = [[
+                torch._C._functions.UndefinedGrad()(o) for o in _differentiable_outputs(func(*tupled_inputs))
+                # This check filters out Tensor-likes that aren't instances of Tensor.
+                if isinstance(o, torch.Tensor)
+            ]]
+            # If there are multiple output grads, we should be able to undef one at a time without error
+            if len(outputs_to_check[0]) > 1:
+                for undef_grad_idx in range(len(output)):
+                    output_to_check = _differentiable_outputs(func(*tupled_inputs))
+                    outputs_to_check.append([
+                        torch._C._functions.UndefinedGrad()(o) if idx == undef_grad_idx else o
+                        for idx, o in enumerate(output_to_check)])
+            for output_to_check in outputs_to_check:
+                if not check_undefined_grad_support(output_to_check):
+                    return False
+    return True
+def gradgradcheck(
+    func: Callable[..., _TensorOrTensors],  # See Note [VarArg of Tensors]
+    inputs: _TensorOrTensors,
+    grad_outputs: Optional[_TensorOrTensors] = None,
+    eps: float = 1e-6,
+    atol: float = 1e-5,
+    rtol: float = 1e-3,
+    gen_non_contig_grad_outputs: bool = False,
+    raise_exception: bool = True,
+    nondet_tol: float = 0.0,
+    check_undefined_grad: bool = True,
+    check_grad_dtypes: bool = False
+) -> bool:
+    r"""Check gradients of gradients computed via small finite differences
+    against analytical gradients w.r.t. tensors in :attr:`inputs` and
+    :attr:`grad_outputs` that are of floating point or complex type and with
+    ``requires_grad=True``.
+    This function checks that backpropagating through the gradients computed
+    to the given :attr:`grad_outputs` are correct.
+    The check between numerical and analytical gradients uses :func:`~torch.allclose`.
+    .. note::
+        The default values are designed for :attr:`input` and
+        :attr:`grad_outputs` of double precision. This check will likely fail if
+        they are of less precision, e.g., ``FloatTensor``.
+    .. warning::
+       If any checked tensor in :attr:`input` and :attr:`grad_outputs` has
+       overlapping memory, i.e., different indices pointing to the same memory
+       address (e.g., from :func:`torch.expand`), this check will likely fail
+       because the numerical gradients computed by point perturbation at such
+       indices will change values at all other indices that share the same
+       memory address.
+    Args:
+        func (function): a Python function that takes Tensor inputs and returns
+            a Tensor or a tuple of Tensors
+        inputs (tuple of Tensor or Tensor): inputs to the function
+        grad_outputs (tuple of Tensor or Tensor, optional): The gradients with
+            respect to the function's outputs.
+        eps (float, optional): perturbation for finite differences
+        atol (float, optional): absolute tolerance
+        rtol (float, optional): relative tolerance
+        gen_non_contig_grad_outputs (bool, optional): if :attr:`grad_outputs` is
+            ``None`` and :attr:`gen_non_contig_grad_outputs` is ``True``, the
+            randomly generated gradient outputs are made to be noncontiguous
+        raise_exception (bool, optional): indicating whether to raise an exception if
+            the check fails. The exception gives more information about the
+            exact nature of the failure. This is helpful when debugging gradchecks.
+        nondet_tol (float, optional): tolerance for non-determinism. When running
+            identical inputs through the differentiation, the results must either match
+            exactly (default, 0.0) or be within this tolerance. Note that a small amount
+            of nondeterminism in the gradient will lead to larger inaccuracies in
+            the second derivative.
+        check_undefined_grad (bool, options): if True, check if undefined output grads
+            are supported and treated as zeros
+    Returns:
+        True if all differences satisfy allclose condition
+    """
+    tupled_inputs = _as_tuple(inputs)
+    if grad_outputs is None:
+        # If grad_outputs is not specified, create random Tensors of the same
+        # shape, type, and device as the outputs
+        def randn_like(x):
+            y = torch.testing.randn_like(
+                x if (x.is_floating_point() or x.is_complex()) else x.double(), memory_format=torch.legacy_contiguous_format)
+            if gen_non_contig_grad_outputs:
+                y = torch.testing.make_non_contiguous(y)
+            return y.requires_grad_()
+        outputs = _as_tuple(func(*tupled_inputs))
+        tupled_grad_outputs = tuple(randn_like(x) for x in outputs)
+    else:
+        tupled_grad_outputs = _as_tuple(grad_outputs)
+    num_outputs = len(tupled_grad_outputs)
+    def new_func(*args):
+        input_args = args[:-num_outputs]
+        grad_outputs = args[-num_outputs:]
+        outputs = _differentiable_outputs(func(*input_args))
+        input_args = tuple(x for x in input_args if isinstance(x, torch.Tensor) and x.requires_grad)
+        grad_inputs = torch.autograd.grad(outputs, input_args, grad_outputs, create_graph=True)
+        return grad_inputs
+    return gradcheck(new_func, tupled_inputs + tupled_grad_outputs, eps, atol, rtol, raise_exception,
+                     nondet_tol=nondet_tol, check_undefined_grad=check_undefined_grad,
+                     check_grad_dtypes=check_grad_dtypes)

third-party/DPVO/dpvo/lietorch/group_ops.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import lietorch_backends
+import torch
+import torch.nn.functional as F
+class GroupOp(torch.autograd.Function):
+    """ group operation base class """
+    @classmethod
+    def forward(cls, ctx, group_id, *inputs):
+        ctx.group_id = group_id
+        ctx.save_for_backward(*inputs)
+        out = cls.forward_op(ctx.group_id, *inputs)
+        return out
+    @classmethod
+    def backward(cls, ctx, grad):
+        error_str = "Backward operation not implemented for {}".format(cls)
+        assert cls.backward_op is not None, error_str
+        inputs = ctx.saved_tensors
+        grad = grad.contiguous()
+        grad_inputs = cls.backward_op(ctx.group_id, grad, *inputs)
+        return (None, ) + tuple(grad_inputs)
+class Exp(GroupOp):
+    """ exponential map """
+    forward_op, backward_op = lietorch_backends.expm, lietorch_backends.expm_backward
+class Log(GroupOp):
+    """ logarithm map """
+    forward_op, backward_op = lietorch_backends.logm, lietorch_backends.logm_backward
+class Inv(GroupOp):
+    """ group inverse """
+    forward_op, backward_op = lietorch_backends.inv, lietorch_backends.inv_backward
+class Mul(GroupOp):
+    """ group multiplication """
+    forward_op, backward_op = lietorch_backends.mul, lietorch_backends.mul_backward
+class Adj(GroupOp):
+    """ adjoint operator """
+    forward_op, backward_op = lietorch_backends.adj, lietorch_backends.adj_backward
+class AdjT(GroupOp):
+    """ adjoint operator """
+    forward_op, backward_op = lietorch_backends.adjT, lietorch_backends.adjT_backward
+class Act3(GroupOp):
+    """ action on point """
+    forward_op, backward_op = lietorch_backends.act, lietorch_backends.act_backward
+class Act4(GroupOp):
+    """ action on point """
+    forward_op, backward_op = lietorch_backends.act4, lietorch_backends.act4_backward
+class Jinv(GroupOp):
+    """ adjoint operator """
+    forward_op, backward_op = lietorch_backends.Jinv, None
+class ToMatrix(GroupOp):
+    """ convert to matrix representation """
+    forward_op, backward_op = lietorch_backends.as_matrix, None
+### conversion operations to/from Euclidean embeddings ###
+class FromVec(torch.autograd.Function):
+    """ convert vector into group object """
+    @classmethod
+    def forward(cls, ctx, group_id, *inputs):
+        ctx.group_id = group_id
+        ctx.save_for_backward(*inputs)
+        return inputs[0]
+    @classmethod
+    def backward(cls, ctx, grad):
+        inputs = ctx.saved_tensors
+        J = lietorch_backends.projector(ctx.group_id, *inputs)
+        return None, torch.matmul(grad.unsqueeze(-2), torch.linalg.pinv(J)).squeeze(-2)
+class ToVec(torch.autograd.Function):
+    """ convert group object to vector """
+    @classmethod
+    def forward(cls, ctx, group_id, *inputs):
+        ctx.group_id = group_id
+        ctx.save_for_backward(*inputs)
+        return inputs[0]
+    @classmethod
+    def backward(cls, ctx, grad):
+        inputs = ctx.saved_tensors
+        J = lietorch_backends.projector(ctx.group_id, *inputs)
+        return None, torch.matmul(grad.unsqueeze(-2), J).squeeze(-2)

third-party/DPVO/dpvo/lietorch/groups.py ADDED Viewed

	@@ -0,0 +1,322 @@

+import torch
+import numpy as np
+# group operations implemented in cuda
+from .group_ops import Exp, Log, Inv, Mul, Adj, AdjT, Jinv, Act3, Act4, ToMatrix, ToVec, FromVec
+from .broadcasting import broadcast_inputs
+class LieGroupParameter(torch.Tensor):
+    """ Wrapper class for LieGroup """
+    from torch._C import _disabled_torch_function_impl
+    __torch_function__ = _disabled_torch_function_impl
+    def __new__(cls, group, requires_grad=True):
+        data = torch.zeros(group.tangent_shape,
+                           device=group.data.device,
+                           dtype=group.data.dtype,
+                           requires_grad=True)
+        return torch.Tensor._make_subclass(cls, data, requires_grad)
+    def __init__(self, group):
+        self.group = group
+    def retr(self):
+        return self.group.retr(self)
+    def log(self):
+        return self.retr().log()
+    def inv(self):
+        return self.retr().inv()
+    def adj(self, a):
+        return self.retr().adj(a)
+    def __mul__(self, other):
+        if isinstance(other, LieGroupParameter):
+            return self.retr() * other.retr()
+        else:
+            return self.retr() * other
+    def add_(self, update, alpha):
+        self.group = self.group.exp(alpha*update) * self.group
+    def __getitem__(self, index):
+        return self.retr().__getitem__(index)
+class LieGroup:
+    """ Base class for Lie Group """
+    def __init__(self, data):
+        self.data = data
+    def __repr__(self):
+        return "{}: size={}, device={}, dtype={}".format(
+            self.group_name, self.shape, self.device, self.dtype)
+    @property
+    def shape(self):
+        return self.data.shape[:-1]
+    @property
+    def device(self):
+        return self.data.device
+    @property
+    def dtype(self):
+        return self.data.dtype
+    def vec(self):
+        return self.apply_op(ToVec, self.data)
+    @property
+    def tangent_shape(self):
+        return self.data.shape[:-1] + (self.manifold_dim,)
+    @classmethod
+    def Identity(cls, *batch_shape, **kwargs):
+        """ Construct identity element with batch shape """
+        if isinstance(batch_shape[0], tuple):
+            batch_shape = batch_shape[0]
+        elif isinstance(batch_shape[0], list):
+            batch_shape = tuple(batch_shape[0])
+        numel = np.prod(batch_shape)
+        data = cls.id_elem.reshape(1,-1)
+        if 'device' in kwargs:
+            data = data.to(kwargs['device'])
+        if 'dtype' in kwargs:
+            data = data.type(kwargs['dtype'])
+        data = data.repeat(numel, 1)
+        return cls(data).view(batch_shape)
+    @classmethod
+    def IdentityLike(cls, G):
+        return cls.Identity(G.shape, device=G.data.device, dtype=G.data.dtype)
+    @classmethod
+    def InitFromVec(cls, data):
+        return cls(cls.apply_op(FromVec, data))
+    @classmethod
+    def Random(cls, *batch_shape, sigma=1.0, **kwargs):
+        """ Construct random element with batch_shape by random sampling in tangent space"""
+        if isinstance(batch_shape[0], tuple):
+            batch_shape = batch_shape[0]
+        elif isinstance(batch_shape[0], list):
+            batch_shape = tuple(batch_shape[0])
+        tangent_shape = batch_shape + (cls.manifold_dim,)
+        xi = torch.randn(tangent_shape, **kwargs)
+        return cls.exp(sigma * xi)
+    @classmethod
+    def apply_op(cls, op, x, y=None):
+        """ Apply group operator """
+        inputs, out_shape = broadcast_inputs(x, y)
+        data = op.apply(cls.group_id, *inputs)
+        return data.view(out_shape + (-1,))
+    @classmethod
+    def exp(cls, x):
+        """ exponential map: x -> X """
+        return cls(cls.apply_op(Exp, x))
+    def quaternion(self):
+        """ extract quaternion """
+        return self.apply_op(Quat, self.data)
+    def log(self):
+        """ logarithm map """
+        return self.apply_op(Log, self.data)
+    def inv(self):
+        """ group inverse """
+        return self.__class__(self.apply_op(Inv, self.data))
+    def mul(self, other):
+        """ group multiplication """
+        return self.__class__(self.apply_op(Mul, self.data, other.data))
+    def retr(self, a):
+        """ retraction: Exp(a) * X """
+        dX = self.__class__.apply_op(Exp, a)
+        return self.__class__(self.apply_op(Mul, dX, self.data))
+    def adj(self, a):
+        """ adjoint operator: b = A(X) * a """
+        return self.apply_op(Adj, self.data, a)
+    def adjT(self, a):
+        """ transposed adjoint operator: b = a * A(X) """
+        return self.apply_op(AdjT, self.data, a)
+    def Jinv(self, a):
+        return self.apply_op(Jinv, self.data, a)
+    def act(self, p):
+        """ action on a point cloud """
+        # action on point
+        if p.shape[-1] == 3:
+            return self.apply_op(Act3, self.data, p)
+        # action on homogeneous point
+        elif p.shape[-1] == 4:
+            return self.apply_op(Act4, self.data, p)
+    def matrix(self):
+        """ convert element to 4x4 matrix """
+        I = torch.eye(4, dtype=self.dtype, device=self.device)
+        I = I.view([1] * (len(self.data.shape) - 1) + [4, 4])
+        return self.__class__(self.data[...,None,:]).act(I).transpose(-1,-2)
+    def translation(self):
+        """ extract translation component """
+        p = torch.as_tensor([0.0, 0.0, 0.0, 1.0], dtype=self.dtype, device=self.device)
+        p = p.view([1] * (len(self.data.shape) - 1) + [4,])
+        return self.apply_op(Act4, self.data, p)
+    def detach(self):
+        return self.__class__(self.data.detach())
+    def view(self, dims):
+        data_reshaped = self.data.view(dims + (self.embedded_dim,))
+        return self.__class__(data_reshaped)
+    def __mul__(self, other):
+        # group multiplication
+        if isinstance(other, LieGroup):
+            return self.mul(other)
+        # action on point
+        elif isinstance(other, torch.Tensor):
+            return self.act(other)
+    def __getitem__(self, index):
+        return self.__class__(self.data[index])
+    def __setitem__(self, index, item):
+        self.data[index] = item.data
+    def to(self, *args, **kwargs):
+        return self.__class__(self.data.to(*args, **kwargs))
+    def cpu(self):
+        return self.__class__(self.data.cpu())
+    def cuda(self):
+        return self.__class__(self.data.cuda())
+    def float(self, device):
+        return self.__class__(self.data.float())
+    def double(self, device):
+        return self.__class__(self.data.double())
+    def unbind(self, dim=0):
+        return [self.__class__(x) for x in self.data.unbind(dim=dim)]
+class SO3(LieGroup):
+    group_name = 'SO3'
+    group_id = 1
+    manifold_dim = 3
+    embedded_dim = 4
+    # unit quaternion
+    id_elem = torch.as_tensor([0.0, 0.0, 0.0, 1.0])
+    def __init__(self, data):
+        if isinstance(data, SE3):
+            data = data.data[..., 3:7]
+        super(SO3, self).__init__(data)
+class RxSO3(LieGroup):
+    group_name = 'RxSO3'
+    group_id = 2
+    manifold_dim = 4
+    embedded_dim = 5
+    # unit quaternion
+    id_elem = torch.as_tensor([0.0, 0.0, 0.0, 1.0, 1.0])
+    def __init__(self, data):
+        if isinstance(data, Sim3):
+            data = data.data[..., 3:8]
+        super(RxSO3, self).__init__(data)
+class SE3(LieGroup):
+    group_name = 'SE3'
+    group_id = 3
+    manifold_dim = 6
+    embedded_dim = 7
+    # translation, unit quaternion
+    id_elem = torch.as_tensor([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0])
+    def __init__(self, data):
+        if isinstance(data, SO3):
+            translation = torch.zeros_like(data.data[...,:3])
+            data = torch.cat([translation, data.data], -1)
+        super(SE3, self).__init__(data)
+    def scale(self, s):
+        t, q = self.data.split([3,4], -1)
+        t = t * s.unsqueeze(-1)
+        return SE3(torch.cat([t, q], dim=-1))
+class Sim3(LieGroup):
+    group_name = 'Sim3'
+    group_id = 4
+    manifold_dim = 7
+    embedded_dim = 8
+    # translation, unit quaternion, scale
+    id_elem = torch.as_tensor([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0])
+    def __init__(self, data):
+        if isinstance(data, SO3):
+            scale = torch.ones_like(SO3.data[...,:1])
+            translation = torch.zeros_like(SO3.data[...,:3])
+            data = torch.cat([translation, SO3.data, scale], -1)
+        elif isinstance(data, SE3):
+            scale = torch.ones_like(data.data[...,:1])
+            data = torch.cat([data.data, scale], -1)
+        elif isinstance(data, Sim3):
+            data = data.data
+        super(Sim3, self).__init__(data)
+def cat(group_list, dim):
+    """ Concatenate groups along dimension """
+    data = torch.cat([X.data for X in group_list], dim=dim)
+    return group_list[0].__class__(data)
+def stack(group_list, dim):
+    """ Concatenate groups along dimension """
+    data = torch.stack([X.data for X in group_list], dim=dim)
+    return group_list[0].__class__(data)

third-party/DPVO/dpvo/lietorch/include/common.h ADDED Viewed

	@@ -0,0 +1,12 @@

+#ifndef COMMON_H
+#define COMMON_H
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_RUNTIME_NO_MALLOC
+#define EPS 1e-6
+#define PI 3.14159265358979323846
+#endif

third-party/DPVO/dpvo/lietorch/include/dispatch.h ADDED Viewed

	@@ -0,0 +1,48 @@

+#ifndef DISPATCH_H
+#define DISPATCH_H
+#include <torch/extension.h>
+#include "so3.h"
+#include "rxso3.h"
+#include "se3.h"
+#include "sim3.h"
+#define PRIVATE_CASE_TYPE(group_index, enum_type, type, ...)    \
+  case enum_type: {                                             \
+    using scalar_t = type;                                      \
+    switch (group_index) {                                      \
+      case 1: {                                                 \
+        using group_t = SO3<type>;                              \
+        return __VA_ARGS__();                                   \
+      }                                                         \
+      case 2: {                                                 \
+        using group_t = RxSO3<type>;                            \
+        return __VA_ARGS__();                                   \
+      }                                                         \
+      case 3: {                                                 \
+        using group_t = SE3<type>;                              \
+        return __VA_ARGS__();                                   \
+      }                                                         \
+      case 4: {                                                 \
+        using group_t = Sim3<type>;                             \
+        return __VA_ARGS__();                                   \
+      }                                                         \
+    }                                                           \
+  }                                                             \
+#define DISPATCH_GROUP_AND_FLOATING_TYPES(GROUP_INDEX, TYPE, NAME, ...)              \
+  [&] {                                                                              \
+    const auto& the_type = TYPE;                                                     \
+    /* don't use TYPE again in case it is an expensive or side-effect op */          \
+    at::ScalarType _st = ::detail::scalar_type(the_type);                            \
+    switch (_st) {                                                                   \
+      PRIVATE_CASE_TYPE(GROUP_INDEX, at::ScalarType::Double, double, __VA_ARGS__)    \
+      PRIVATE_CASE_TYPE(GROUP_INDEX, at::ScalarType::Float, float, __VA_ARGS__)      \
+      default: break;                                                                \
+    }                                                                                \
+  }()
+#endif

third-party/DPVO/dpvo/lietorch/include/lietorch_cpu.h ADDED Viewed

	@@ -0,0 +1,51 @@

+#ifndef LIETORCH_CPU_H_
+#define LIETORCH_CPU_H_
+#include <vector>
+#include <torch/extension.h>
+// unary operations
+torch::Tensor exp_forward_cpu(int, torch::Tensor);
+std::vector<torch::Tensor> exp_backward_cpu(int, torch::Tensor, torch::Tensor);
+torch::Tensor log_forward_cpu(int, torch::Tensor);
+std::vector<torch::Tensor> log_backward_cpu(int, torch::Tensor, torch::Tensor);
+torch::Tensor inv_forward_cpu(int, torch::Tensor);
+std::vector<torch::Tensor> inv_backward_cpu(int, torch::Tensor, torch::Tensor);
+// binary operations
+torch::Tensor mul_forward_cpu(int, torch::Tensor, torch::Tensor);
+std::vector<torch::Tensor> mul_backward_cpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
+torch::Tensor adj_forward_cpu(int, torch::Tensor, torch::Tensor);
+std::vector<torch::Tensor> adj_backward_cpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
+torch::Tensor adjT_forward_cpu(int, torch::Tensor, torch::Tensor);
+std::vector<torch::Tensor> adjT_backward_cpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
+torch::Tensor act_forward_cpu(int, torch::Tensor, torch::Tensor);
+std::vector<torch::Tensor> act_backward_cpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
+torch::Tensor act4_forward_cpu(int, torch::Tensor, torch::Tensor);
+std::vector<torch::Tensor> act4_backward_cpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
+// conversion operations
+// std::vector<torch::Tensor> to_vec_backward_cpu(int, torch::Tensor, torch::Tensor);
+// std::vector<torch::Tensor> from_vec_backward_cpu(int, torch::Tensor, torch::Tensor);
+// utility operations
+torch::Tensor orthogonal_projector_cpu(int, torch::Tensor);
+torch::Tensor as_matrix_forward_cpu(int, torch::Tensor);
+torch::Tensor jleft_forward_cpu(int, torch::Tensor, torch::Tensor);
+#endif

third-party/DPVO/dpvo/lietorch/include/lietorch_gpu.h ADDED Viewed

	@@ -0,0 +1,51 @@

+#ifndef LIETORCH_GPU_H_
+#define LIETORCH_GPU_H_
+#include <vector>
+#include <torch/extension.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+// unary operations
+torch::Tensor exp_forward_gpu(int, torch::Tensor);
+std::vector<torch::Tensor> exp_backward_gpu(int, torch::Tensor, torch::Tensor);
+torch::Tensor log_forward_gpu(int, torch::Tensor);
+std::vector<torch::Tensor> log_backward_gpu(int, torch::Tensor, torch::Tensor);
+torch::Tensor inv_forward_gpu(int, torch::Tensor);
+std::vector<torch::Tensor> inv_backward_gpu(int, torch::Tensor, torch::Tensor);
+// binary operations
+torch::Tensor mul_forward_gpu(int, torch::Tensor, torch::Tensor);
+std::vector<torch::Tensor> mul_backward_gpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
+torch::Tensor adj_forward_gpu(int, torch::Tensor, torch::Tensor);
+std::vector<torch::Tensor> adj_backward_gpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
+torch::Tensor adjT_forward_gpu(int, torch::Tensor, torch::Tensor);
+std::vector<torch::Tensor> adjT_backward_gpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
+torch::Tensor act_forward_gpu(int, torch::Tensor, torch::Tensor);
+std::vector<torch::Tensor> act_backward_gpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
+torch::Tensor act4_forward_gpu(int, torch::Tensor, torch::Tensor);
+std::vector<torch::Tensor> act4_backward_gpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
+// conversion operations
+// std::vector<torch::Tensor> to_vec_backward_gpu(int, torch::Tensor, torch::Tensor);
+// std::vector<torch::Tensor> from_vec_backward_gpu(int, torch::Tensor, torch::Tensor);
+// utility operators
+torch::Tensor orthogonal_projector_gpu(int, torch::Tensor);
+torch::Tensor as_matrix_forward_gpu(int, torch::Tensor);
+torch::Tensor jleft_forward_gpu(int, torch::Tensor, torch::Tensor);
+#endif

third-party/DPVO/dpvo/lietorch/include/rxso3.h ADDED Viewed

	@@ -0,0 +1,324 @@

+#ifndef RxSO3_HEADER
+#define RxSO3_HEADER
+#include <stdio.h>
+#include <Eigen/Dense>
+#include <Eigen/Geometry>
+#include "common.h"
+template <typename Scalar>
+class RxSO3 {
+  public:
+    const static int constexpr K = 4; // manifold dimension
+    const static int constexpr N = 5; // embedding dimension
+    using Vector3 = Eigen::Matrix<Scalar,3,1>;
+    using Vector4 = Eigen::Matrix<Scalar,4,1>;
+    using Matrix3 = Eigen::Matrix<Scalar,3,3>;
+    using Tangent = Eigen::Matrix<Scalar,K,1>;
+    using Data = Eigen::Matrix<Scalar,N,1>;
+    using Point = Eigen::Matrix<Scalar,3,1>;
+    using Point4 = Eigen::Matrix<Scalar,4,1>;
+    using Quaternion = Eigen::Quaternion<Scalar>;
+    using Transformation = Eigen::Matrix<Scalar,3,3>;
+    using Adjoint = Eigen::Matrix<Scalar,4,4>;
+    EIGEN_DEVICE_FUNC RxSO3(Quaternion const& q, Scalar const s)
+        : unit_quaternion(q), scale(s) {
+      unit_quaternion.normalize();
+    };
+    EIGEN_DEVICE_FUNC RxSO3(const Scalar *data) : unit_quaternion(data), scale(data[4]) {
+      unit_quaternion.normalize();
+    };
+    EIGEN_DEVICE_FUNC RxSO3() {
+      unit_quaternion = Quaternion::Identity();
+      scale = Scalar(1.0);
+    }
+    EIGEN_DEVICE_FUNC RxSO3<Scalar> inv() {
+      return RxSO3<Scalar>(unit_quaternion.conjugate(), 1.0/scale);
+    }
+    EIGEN_DEVICE_FUNC Data data() const {
+      Data data_vec; data_vec << unit_quaternion.coeffs(), scale;
+      return data_vec;
+    }
+    EIGEN_DEVICE_FUNC RxSO3<Scalar> operator*(RxSO3<Scalar> const& other) {
+      return RxSO3<Scalar>(unit_quaternion * other.unit_quaternion, scale * other.scale);
+    }
+    EIGEN_DEVICE_FUNC Point operator*(Point const& p) const {
+      const Quaternion& q = unit_quaternion;
+      Point uv = q.vec().cross(p); uv += uv;
+      return scale * (p + q.w()*uv + q.vec().cross(uv));
+    }
+    EIGEN_DEVICE_FUNC Point4 act4(Point4 const& p) const {
+      Point4 p1; p1 << this->operator*(p.template segment<3>(0)), p(3);
+      return p1;
+    }
+    EIGEN_DEVICE_FUNC Adjoint Adj() const {
+      Adjoint Ad = Adjoint::Identity();
+      Ad.template block<3,3>(0,0) = unit_quaternion.toRotationMatrix();
+      return Ad;
+    }
+    EIGEN_DEVICE_FUNC Transformation Matrix() const {
+      return scale * unit_quaternion.toRotationMatrix();
+    }
+    EIGEN_DEVICE_FUNC Eigen::Matrix<Scalar,4,4> Matrix4x4() const {
+      Eigen::Matrix<Scalar,4,4> T;
+      T = Eigen::Matrix<Scalar,4,4>::Identity();
+      T.template block<3,3>(0,0) = Matrix();
+      return T;
+    }
+    EIGEN_DEVICE_FUNC Eigen::Matrix<Scalar,5,5> orthogonal_projector() const {
+      // jacobian action on a point
+      Eigen::Matrix<Scalar,5,5> J = Eigen::Matrix<Scalar,5,5>::Zero();
+      J.template block<3,3>(0,0) = 0.5 * (
+        unit_quaternion.w() * Matrix3::Identity() +
+        SO3<Scalar>::hat(-unit_quaternion.vec())
+      );
+      J.template block<1,3>(3,0) = 0.5 * (-unit_quaternion.vec());
+      // scale
+      J(4,3) = scale;
+      return J;
+    }
+    EIGEN_DEVICE_FUNC Transformation Rotation() const {
+      return unit_quaternion.toRotationMatrix();
+    }
+    EIGEN_DEVICE_FUNC Tangent Adj(Tangent const& a) const {
+      return Adj() * a;
+    }
+    EIGEN_DEVICE_FUNC Tangent AdjT(Tangent const& a) const {
+      return Adj().transpose() * a;
+    }
+    EIGEN_DEVICE_FUNC static Transformation hat(Tangent const& phi_sigma) {
+      Vector3 const phi = phi_sigma.template segment<3>(0);
+      return SO3<Scalar>::hat(phi) + phi(3) * Transformation::Identity();
+    }
+    EIGEN_DEVICE_FUNC static Adjoint adj(Tangent const& phi_sigma) {
+      Vector3 const phi = phi_sigma.template segment<3>(0);
+      Matrix3 const Phi = SO3<Scalar>::hat(phi);
+      Adjoint ad = Adjoint::Zero();
+      ad.template block<3,3>(0,0) = Phi;
+      return ad;
+    }
+    EIGEN_DEVICE_FUNC Tangent Log() const {
+      using std::abs;
+      using std::atan;
+      using std::sqrt;
+      Scalar squared_n = unit_quaternion.vec().squaredNorm();
+      Scalar w = unit_quaternion.w();
+      Scalar two_atan_nbyw_by_n;
+      /// Atan-based log thanks to
+      ///
+      /// C. Hertzberg et al.:
+      /// "Integrating Generic Sensor Fusion Algorithms with Sound State
+      /// Representation through Encapsulation of Manifolds"
+      /// Information Fusion, 2011
+      if (squared_n < EPS * EPS) {
+        two_atan_nbyw_by_n = Scalar(2) / w - Scalar(2.0/3.0) * (squared_n) / (w * w * w);
+      } else {
+        Scalar n = sqrt(squared_n);
+        if (abs(w) < EPS) {
+          if (w > Scalar(0)) {
+            two_atan_nbyw_by_n = PI / n;
+          } else {
+            two_atan_nbyw_by_n = -PI / n;
+          }
+        } else {
+          two_atan_nbyw_by_n = Scalar(2) * atan(n / w) / n;
+        }
+      }
+      Tangent phi_sigma;
+      phi_sigma << two_atan_nbyw_by_n * unit_quaternion.vec(), log(scale);
+      return phi_sigma;
+    }
+    EIGEN_DEVICE_FUNC static RxSO3<Scalar> Exp(Tangent const& phi_sigma) {
+      Vector3 phi = phi_sigma.template segment<3>(0);
+      Scalar scale = exp(phi_sigma(3));
+      Scalar theta2 = phi.squaredNorm();
+      Scalar theta = sqrt(theta2);
+      Scalar imag_factor;
+      Scalar real_factor;
+      if (theta < EPS) {
+        Scalar theta4 = theta2 * theta2;
+        imag_factor = Scalar(0.5) - Scalar(1.0/48.0) * theta2 + Scalar(1.0/3840.0) * theta4;
+        real_factor = Scalar(1) - Scalar(1.0/8.0) * theta2 + Scalar(1.0/384.0) * theta4;
+      } else {
+        imag_factor = sin(.5 * theta) / theta;
+        real_factor = cos(.5 * theta);
+      }
+      Quaternion q(real_factor, imag_factor*phi.x(), imag_factor*phi.y(), imag_factor*phi.z());
+      return RxSO3<Scalar>(q, scale);
+    }
+    EIGEN_DEVICE_FUNC static Matrix3 calcW(Tangent const& phi_sigma) {
+      // left jacobian
+      using std::abs;
+      Matrix3 const I = Matrix3::Identity();
+      Scalar const one(1);
+      Scalar const half(0.5);
+      Vector3 const phi = phi_sigma.template segment<3>(0);
+      Scalar const sigma = phi_sigma(3);
+      Scalar const theta = phi.norm();
+      Matrix3 const Phi = SO3<Scalar>::hat(phi);
+      Matrix3 const Phi2 = Phi * Phi;
+      Scalar const scale = exp(sigma);
+      Scalar A, B, C;
+      if (abs(sigma) < EPS) {
+        C = one;
+        if (abs(theta) < EPS) {
+          A = half;
+          B = Scalar(1. / 6.);
+        } else {
+          Scalar theta_sq = theta * theta;
+          A = (one - cos(theta)) / theta_sq;
+          B = (theta - sin(theta)) / (theta_sq * theta);
+        }
+      } else {
+        C = (scale - one) / sigma;
+        if (abs(theta) < EPS) {
+          Scalar sigma_sq = sigma * sigma;
+          A = ((sigma - one) * scale + one) / sigma_sq;
+          B = (scale * half * sigma_sq + scale - one - sigma * scale) /
+              (sigma_sq * sigma);
+        } else {
+          Scalar theta_sq = theta * theta;
+          Scalar a = scale * sin(theta);
+          Scalar b = scale * cos(theta);
+          Scalar c = theta_sq + sigma * sigma;
+          A = (a * sigma + (one - b) * theta) / (theta * c);
+          B = (C - ((b - one) * sigma + a * theta) / (c)) * one / (theta_sq);
+        }
+      }
+      return A * Phi + B * Phi2 + C * I;
+    }
+    EIGEN_DEVICE_FUNC static Matrix3 calcWInv(Tangent const& phi_sigma) {
+      // left jacobian inverse
+      Matrix3 const I = Matrix3::Identity();
+      Scalar const half(0.5);
+      Scalar const one(1);
+      Scalar const two(2);
+      Vector3 const phi = phi_sigma.template segment<3>(0);
+      Scalar const sigma = phi_sigma(3);
+      Scalar const theta = phi.norm();
+      Scalar const scale = exp(sigma);
+      Matrix3 const Phi = SO3<Scalar>::hat(phi);
+      Matrix3 const Phi2 = Phi * Phi;
+      Scalar const scale_sq = scale * scale;
+      Scalar const theta_sq = theta * theta;
+      Scalar const sin_theta = sin(theta);
+      Scalar const cos_theta = cos(theta);
+      Scalar a, b, c;
+      if (abs(sigma * sigma) < EPS) {
+        c = one - half * sigma;
+        a = -half;
+        if (abs(theta_sq) < EPS) {
+          b = Scalar(1. / 12.);
+        } else {
+          b = (theta * sin_theta + two * cos_theta - two) /
+              (two * theta_sq * (cos_theta - one));
+        }
+      } else {
+        Scalar const scale_cu = scale_sq * scale;
+        c = sigma / (scale - one);
+        if (abs(theta_sq) < EPS) {
+          a = (-sigma * scale + scale - one) / ((scale - one) * (scale - one));
+          b = (scale_sq * sigma - two * scale_sq + scale * sigma + two * scale) /
+              (two * scale_cu - Scalar(6) * scale_sq + Scalar(6) * scale - two);
+        } else {
+          Scalar const s_sin_theta = scale * sin_theta;
+          Scalar const s_cos_theta = scale * cos_theta;
+          a = (theta * s_cos_theta - theta - sigma * s_sin_theta) /
+              (theta * (scale_sq - two * s_cos_theta + one));
+          b = -scale *
+              (theta * s_sin_theta - theta * sin_theta + sigma * s_cos_theta -
+              scale * sigma + sigma * cos_theta - sigma) /
+              (theta_sq * (scale_cu - two * scale * s_cos_theta - scale_sq +
+                          two * s_cos_theta + scale - one));
+        }
+      }
+      return a * Phi + b * Phi2 + c * I;
+    }
+    EIGEN_DEVICE_FUNC static Adjoint left_jacobian(Tangent const& phi_sigma) {
+      // left jacobian
+      Adjoint J = Adjoint::Identity();
+      Vector3 phi = phi_sigma.template segment<3>(0);
+      J.template block<3,3>(0,0) = SO3<Scalar>::left_jacobian(phi);
+      return J;
+    }
+  EIGEN_DEVICE_FUNC static Adjoint left_jacobian_inverse(Tangent const& phi_sigma) {
+      // left jacobian inverse
+      Adjoint Jinv = Adjoint::Identity();
+      Vector3 phi = phi_sigma.template segment<3>(0);
+      Jinv.template block<3,3>(0,0) = SO3<Scalar>::left_jacobian_inverse(phi);
+      return Jinv;
+    }
+    EIGEN_DEVICE_FUNC static Eigen::Matrix<Scalar,3,4> act_jacobian(Point const& p) {
+      // jacobian action on a point
+      Eigen::Matrix<Scalar,3,4> Ja;
+      Ja << SO3<Scalar>::hat(-p), p;
+      return Ja;
+    }
+    EIGEN_DEVICE_FUNC static Eigen::Matrix<Scalar,4,4> act4_jacobian(Point4 const& p) {
+      // jacobian action on a point
+      Eigen::Matrix<Scalar,4,4> J = Eigen::Matrix<Scalar,4,4>::Zero();
+      J.template block<3,3>(0,0) = SO3<Scalar>::hat(-p.template segment<3>(0));
+      J.template block<3,1>(0,3) = p.template segment<3>(0);
+      return J;
+    }
+  private:
+    Quaternion unit_quaternion;
+    Scalar scale;
+};
+#endif

third-party/DPVO/dpvo/lietorch/include/se3.h ADDED Viewed

	@@ -0,0 +1,229 @@

+#ifndef SE3_HEADER
+#define SE3_HEADER
+#include <stdio.h>
+#include <Eigen/Dense>
+#include <Eigen/Geometry>
+#include "common.h"
+#include "so3.h"
+template <typename Scalar>
+class SE3 {
+  public:
+    const static int constexpr K = 6; // manifold dimension
+    const static int constexpr N = 7; // embedding dimension
+    using Vector3 = Eigen::Matrix<Scalar,3,1>;
+    using Vector4 = Eigen::Matrix<Scalar,4,1>;
+    using Matrix3 = Eigen::Matrix<Scalar,3,3>;
+    using Tangent = Eigen::Matrix<Scalar,K,1>;
+    using Point = Eigen::Matrix<Scalar,3,1>;
+    using Point4 = Eigen::Matrix<Scalar,4,1>;
+    using Data = Eigen::Matrix<Scalar,N,1>;
+    using Transformation = Eigen::Matrix<Scalar,4,4>;
+    using Adjoint = Eigen::Matrix<Scalar,K,K>;
+    EIGEN_DEVICE_FUNC SE3() { translation = Vector3::Zero(); }
+    EIGEN_DEVICE_FUNC SE3(SO3<Scalar> const& so3, Vector3 const& t) : so3(so3), translation(t) {};
+    EIGEN_DEVICE_FUNC SE3(const Scalar *data) :  translation(data), so3(data+3) {};
+    EIGEN_DEVICE_FUNC SE3<Scalar> inv() {
+      return SE3(so3.inv(), -(so3.inv()*translation));
+    }
+    EIGEN_DEVICE_FUNC Data data() const {
+      Data data_vec; data_vec << translation, so3.data();
+      return data_vec;
+    }
+    EIGEN_DEVICE_FUNC SE3<Scalar> operator*(SE3<Scalar> const& other) {
+      return SE3(so3 * other.so3, translation + so3 * other.translation);
+    }
+    EIGEN_DEVICE_FUNC Point operator*(Point const& p) const {
+      return so3 * p + translation;
+    }
+    EIGEN_DEVICE_FUNC Point4 act4(Point4 const& p) const {
+      Point4 p1; p1 << so3 * p.template segment<3>(0) + translation * p(3), p(3);
+      return p1;
+    }
+    EIGEN_DEVICE_FUNC Adjoint Adj() const {
+      Matrix3 R = so3.Matrix();
+      Matrix3 tx = SO3<Scalar>::hat(translation);
+      Matrix3 Zer = Matrix3::Zero();
+      Adjoint Ad;
+      Ad << R, tx*R, Zer, R;
+      return Ad;
+    }
+    EIGEN_DEVICE_FUNC Transformation Matrix() const {
+      Transformation T = Transformation::Identity();
+      T.template block<3,3>(0,0) = so3.Matrix();
+      T.template block<3,1>(0,3) = translation;
+      return T;
+    }
+    EIGEN_DEVICE_FUNC Transformation Matrix4x4() const {
+      return Matrix();
+    }
+    EIGEN_DEVICE_FUNC Tangent Adj(Tangent const& a) const {
+      return Adj() * a;
+    }
+    EIGEN_DEVICE_FUNC Tangent AdjT(Tangent const& a) const {
+      return Adj().transpose() * a;
+    }
+    EIGEN_DEVICE_FUNC static Transformation hat(Tangent const& tau_phi) {
+      Vector3 tau = tau_phi.template segment<3>(0);
+      Vector3 phi = tau_phi.template segment<3>(3);
+      Transformation TauPhi = Transformation::Zero();
+      TauPhi.template block<3,3>(0,0) = SO3<Scalar>::hat(phi);
+      TauPhi.template block<3,1>(0,3) = tau;
+      return TauPhi;
+    }
+    EIGEN_DEVICE_FUNC static Adjoint adj(Tangent const& tau_phi) {
+      Vector3 tau = tau_phi.template segment<3>(0);
+      Vector3 phi = tau_phi.template segment<3>(3);
+      Matrix3 Tau = SO3<Scalar>::hat(tau);
+      Matrix3 Phi = SO3<Scalar>::hat(phi);
+      Matrix3 Zer = Matrix3::Zero();
+      Adjoint ad;
+      ad << Phi, Tau, Zer, Phi;
+      return ad;
+    }
+    EIGEN_DEVICE_FUNC Eigen::Matrix<Scalar,7,7> orthogonal_projector() const {
+      // jacobian action on a point
+      Eigen::Matrix<Scalar,7,7> J = Eigen::Matrix<Scalar,7,7>::Zero();
+      J.template block<3,3>(0,0) = Matrix3::Identity();
+      J.template block<3,3>(0,3) = SO3<Scalar>::hat(-translation);
+      J.template block<4,4>(3,3) = so3.orthogonal_projector();
+      return J;
+    }
+    EIGEN_DEVICE_FUNC Tangent Log() const {
+      Vector3 phi = so3.Log();
+      Matrix3 Vinv = SO3<Scalar>::left_jacobian_inverse(phi);
+      Tangent tau_phi;
+      tau_phi << Vinv * translation, phi;
+      return tau_phi;
+    }
+    EIGEN_DEVICE_FUNC static SE3<Scalar> Exp(Tangent const& tau_phi) {
+      Vector3 tau = tau_phi.template segment<3>(0);
+      Vector3 phi = tau_phi.template segment<3>(3);
+      SO3<Scalar> so3 = SO3<Scalar>::Exp(phi);
+      Vector3 t = SO3<Scalar>::left_jacobian(phi) * tau;
+      return SE3<Scalar>(so3, t);
+    }
+    EIGEN_DEVICE_FUNC static Matrix3 calcQ(Tangent const& tau_phi) {
+      // Q matrix
+      Vector3 tau = tau_phi.template segment<3>(0);
+      Vector3 phi = tau_phi.template segment<3>(3);
+      Matrix3 Tau = SO3<Scalar>::hat(tau);
+      Matrix3 Phi = SO3<Scalar>::hat(phi);
+      Scalar theta = phi.norm();
+      Scalar theta_pow2 = theta * theta;
+      Scalar theta_pow4 = theta_pow2 * theta_pow2;
+      Scalar coef1 = (theta < EPS) ?
+        Scalar(1.0/6.0) - Scalar(1.0/120.0) * theta_pow2 :
+        (theta - sin(theta)) / (theta_pow2 * theta);
+      Scalar coef2 = (theta < EPS) ?
+        Scalar(1.0/24.0) - Scalar(1.0/720.0) * theta_pow2 :
+        (theta_pow2 + 2*cos(theta) - 2) / (2 * theta_pow4);
+      Scalar coef3 = (theta < EPS) ?
+        Scalar(1.0/120.0) - Scalar(1.0/2520.0) * theta_pow2 :
+        (2*theta - 3*sin(theta) + theta*cos(theta)) / (2 * theta_pow4 * theta);
+      Matrix3 Q = Scalar(0.5) * Tau +
+        coef1 * (Phi*Tau + Tau*Phi + Phi*Tau*Phi) +
+        coef2 * (Phi*Phi*Tau + Tau*Phi*Phi - 3*Phi*Tau*Phi) +
+        coef3 * (Phi*Tau*Phi*Phi + Phi*Phi*Tau*Phi);
+      return Q;
+    }
+    EIGEN_DEVICE_FUNC static Adjoint left_jacobian(Tangent const& tau_phi) {
+      // left jacobian
+      Vector3 phi = tau_phi.template segment<3>(3);
+      Matrix3 J = SO3<Scalar>::left_jacobian(phi);
+      Matrix3 Q = SE3<Scalar>::calcQ(tau_phi);
+      Matrix3 Zer = Matrix3::Zero();
+      Adjoint J6x6;
+      J6x6 << J, Q, Zer, J;
+      return J6x6;
+    }
+    EIGEN_DEVICE_FUNC static Adjoint left_jacobian_inverse(Tangent const& tau_phi) {
+      // left jacobian inverse
+      Vector3 tau = tau_phi.template segment<3>(0);
+      Vector3 phi = tau_phi.template segment<3>(3);
+      Matrix3 Jinv = SO3<Scalar>::left_jacobian_inverse(phi);
+      Matrix3 Q = SE3<Scalar>::calcQ(tau_phi);
+      Matrix3 Zer = Matrix3::Zero();
+      Adjoint J6x6;
+      J6x6 << Jinv, -Jinv * Q * Jinv, Zer, Jinv;
+      return J6x6;
+    }
+    EIGEN_DEVICE_FUNC static Eigen::Matrix<Scalar,3,6> act_jacobian(Point const& p) {
+      // jacobian action on a point
+      Eigen::Matrix<Scalar,3,6> J;
+      J.template block<3,3>(0,0) = Matrix3::Identity();
+      J.template block<3,3>(0,3) = SO3<Scalar>::hat(-p);
+      return J;
+    }
+    EIGEN_DEVICE_FUNC static Eigen::Matrix<Scalar,4,6> act4_jacobian(Point4 const& p) {
+      // jacobian action on a point
+      Eigen::Matrix<Scalar,4,6> J = Eigen::Matrix<Scalar,4,6>::Zero();
+      J.template block<3,3>(0,0) = p(3) * Matrix3::Identity();
+      J.template block<3,3>(0,3) = SO3<Scalar>::hat(-p.template segment<3>(0));
+      return J;
+    }
+  private:
+    SO3<Scalar> so3;
+    Vector3 translation;
+};
+#endif

third-party/DPVO/dpvo/lietorch/include/sim3.h ADDED Viewed

	@@ -0,0 +1,217 @@

+#ifndef Sim3_HEADER
+#define Sim3_HEADER
+#include <stdio.h>
+#include <iostream>
+#include <Eigen/Dense>
+#include <Eigen/Geometry>
+#include "common.h"
+#include "so3.h"
+#include "rxso3.h"
+template <typename Scalar>
+class Sim3 {
+  public:
+    const static int constexpr K = 7; // manifold dimension
+    const static int constexpr N = 8; // embedding dimension
+    using Vector3 = Eigen::Matrix<Scalar,3,1>;
+    using Vector4 = Eigen::Matrix<Scalar,4,1>;
+    using Matrix3 = Eigen::Matrix<Scalar,3,3>;
+    using Tangent = Eigen::Matrix<Scalar,K,1>;
+    using Point = Eigen::Matrix<Scalar,3,1>;
+    using Point4 = Eigen::Matrix<Scalar,4,1>;
+    using Data = Eigen::Matrix<Scalar,N,1>;
+    using Transformation = Eigen::Matrix<Scalar,4,4>;
+    using Adjoint = Eigen::Matrix<Scalar,K,K>;
+    EIGEN_DEVICE_FUNC Sim3() {
+      translation = Vector3::Zero();
+    }
+    EIGEN_DEVICE_FUNC Sim3(RxSO3<Scalar> const& rxso3, Vector3 const& t)
+      : rxso3(rxso3), translation(t) {};
+    EIGEN_DEVICE_FUNC Sim3(const Scalar *data)
+      : translation(data), rxso3(data+3)  {};
+    EIGEN_DEVICE_FUNC Sim3<Scalar> inv() {
+      return Sim3<Scalar>(rxso3.inv(), -(rxso3.inv() * translation));
+    }
+    EIGEN_DEVICE_FUNC Data data() const {
+      Data data_vec; data_vec << translation, rxso3.data();
+      return data_vec;
+    }
+    EIGEN_DEVICE_FUNC Sim3<Scalar> operator*(Sim3<Scalar> const& other) {
+      return Sim3(rxso3 * other.rxso3, translation + rxso3 * other.translation);
+    }
+    EIGEN_DEVICE_FUNC Point operator*(Point const& p) const {
+      return (rxso3 * p) + translation;
+    }
+    EIGEN_DEVICE_FUNC Point4 act4(Point4 const& p) const {
+      Point4 p1; p1 << rxso3 * p.template segment<3>(0) + p(3) * translation , p(3);
+      return p1;
+    }
+    EIGEN_DEVICE_FUNC Transformation Matrix() const {
+      Transformation T = Transformation::Identity();
+      T.template block<3,3>(0,0) = rxso3.Matrix();
+      T.template block<3,1>(0,3) = translation;
+      return T;
+    }
+    EIGEN_DEVICE_FUNC Transformation Matrix4x4() const {
+      Transformation T = Transformation::Identity();
+      T.template block<3,3>(0,0) = rxso3.Matrix();
+      T.template block<3,1>(0,3) = translation;
+      return T;
+    }
+    EIGEN_DEVICE_FUNC Eigen::Matrix<Scalar,8,8> orthogonal_projector() const {
+      // jacobian action on a point
+      Eigen::Matrix<Scalar,8,8> J = Eigen::Matrix<Scalar,8,8>::Zero();
+      J.template block<3,3>(0,0) = Matrix3::Identity();
+      J.template block<3,3>(0,3) = SO3<Scalar>::hat(-translation);
+      J.template block<3,1>(0,6) = translation;
+      J.template block<5,5>(3,3) = rxso3.orthogonal_projector();
+      return J;
+    }
+    EIGEN_DEVICE_FUNC Adjoint Adj() const {
+      Adjoint Ad = Adjoint::Identity();
+      Matrix3 sR = rxso3.Matrix();
+      Matrix3 tx = SO3<Scalar>::hat(translation);
+      Matrix3 R = rxso3.Rotation();
+      Ad.template block<3,3>(0,0) = sR;
+      Ad.template block<3,3>(0,3) = tx * R;
+      Ad.template block<3,1>(0,6) = -translation;
+      Ad.template block<3,3>(3,3) = R;
+      return Ad;
+    }
+    EIGEN_DEVICE_FUNC Tangent Adj(Tangent const& a) const {
+      return Adj() * a;
+    }
+    EIGEN_DEVICE_FUNC Tangent AdjT(Tangent const& a) const {
+      return Adj().transpose() * a;
+    }
+    EIGEN_DEVICE_FUNC static Transformation hat(Tangent const& tau_phi_sigma) {
+      Vector3 tau = tau_phi_sigma.template segment<3>(0);
+      Vector3 phi = tau_phi_sigma.template segment<3>(3);
+      Scalar sigma = tau_phi_sigma(6);
+      Matrix3 Phi = SO3<Scalar>::hat(phi);
+      Matrix3 I = Matrix3::Identity();
+      Transformation Omega = Transformation::Zero();
+      Omega.template block<3,3>(0,0) = Phi + sigma * I;
+      Omega.template block<3,1>(0,3) = tau;
+      return Omega;
+    }
+    EIGEN_DEVICE_FUNC  static Adjoint adj(Tangent const& tau_phi_sigma) {
+      Adjoint ad = Adjoint::Zero();
+      Vector3 tau = tau_phi_sigma.template segment<3>(0);
+      Vector3 phi = tau_phi_sigma.template segment<3>(3);
+      Scalar sigma = tau_phi_sigma(6);
+      Matrix3 Tau = SO3<Scalar>::hat(tau);
+      Matrix3 Phi = SO3<Scalar>::hat(phi);
+      Matrix3 I = Matrix3::Identity();
+      ad.template block<3,3>(0,0) = Phi + sigma * I;
+      ad.template block<3,3>(0,3) = Tau;
+      ad.template block<3,1>(0,6) = -tau;
+      ad.template block<3,3>(3,3) = Phi;
+      return ad;
+    }
+    EIGEN_DEVICE_FUNC Tangent Log() const {
+      // logarithm map
+      Vector4 phi_sigma = rxso3.Log();
+      Matrix3 W = RxSO3<Scalar>::calcW(phi_sigma);
+      Tangent tau_phi_sigma;
+      tau_phi_sigma << W.inverse() * translation, phi_sigma;
+      return tau_phi_sigma;
+    }
+    EIGEN_DEVICE_FUNC static Sim3<Scalar> Exp(Tangent const& tau_phi_sigma) {
+      // exponential map
+      Vector3 tau = tau_phi_sigma.template segment<3>(0);
+      Vector4 phi_sigma = tau_phi_sigma.template segment<4>(3);
+      RxSO3<Scalar> rxso3 = RxSO3<Scalar>::Exp(phi_sigma);
+      Matrix3 W = RxSO3<Scalar>::calcW(phi_sigma);
+      return Sim3<Scalar>(rxso3, W*tau);
+    }
+    EIGEN_DEVICE_FUNC static Adjoint left_jacobian(Tangent const& tau_phi_sigma) {
+      // left jacobian
+      Adjoint const Xi = adj(tau_phi_sigma);
+      Adjoint const Xi2 = Xi * Xi;
+      Adjoint const Xi4 = Xi2 * Xi2;
+      return Adjoint::Identity()
+        + Scalar(1.0/2.0)*Xi
+        + Scalar(1.0/6.0)*Xi2
+        + Scalar(1.0/24.0)*Xi*Xi2
+        + Scalar(1.0/120.0)*Xi4;
+        + Scalar(1.0/720.0)*Xi*Xi4;
+    }
+    EIGEN_DEVICE_FUNC static Adjoint left_jacobian_inverse(Tangent const& tau_phi_sigma) {
+      // left jacobian inverse
+      Adjoint const Xi = adj(tau_phi_sigma);
+      Adjoint const Xi2 = Xi * Xi;
+      Adjoint const Xi4 = Xi2 * Xi2;
+      return Adjoint::Identity()
+        - Scalar(1.0/2.0)*Xi
+        + Scalar(1.0/12.0)*Xi2
+        - Scalar(1.0/720.0)*Xi4;
+    }
+    EIGEN_DEVICE_FUNC static Eigen::Matrix<Scalar,3,7> act_jacobian(Point const& p) {
+      // jacobian action on a point
+      Eigen::Matrix<Scalar,3,7> J;
+      J.template block<3,3>(0,0) = Matrix3::Identity();
+      J.template block<3,3>(0,3) = SO3<Scalar>::hat(-p);
+      J.template block<3,1>(0,6) = p;
+      return J;
+    }
+    EIGEN_DEVICE_FUNC static Eigen::Matrix<Scalar,4,7> act4_jacobian(Point4 const& p) {
+      // jacobian action on a point
+      Eigen::Matrix<Scalar,4,7> J = Eigen::Matrix<Scalar,4,7>::Zero();
+      J.template block<3,3>(0,0) = p(3) * Matrix3::Identity();
+      J.template block<3,3>(0,3) = SO3<Scalar>::hat(-p.template segment<3>(0));
+      J.template block<3,1>(0,6) = p.template segment<3>(0);
+      return J;
+    }
+  private:
+    Vector3 translation;
+    RxSO3<Scalar> rxso3;
+};
+#endif

third-party/DPVO/dpvo/lietorch/include/so3.h ADDED Viewed

	@@ -0,0 +1,229 @@

+#ifndef SO3_HEADER
+#define SO3_HEADER
+#include <cuda.h>
+#include <stdio.h>
+#include <Eigen/Dense>
+#include <Eigen/Geometry>
+#include "common.h"
+template <typename Scalar>
+class SO3 {
+  public:
+    const static int constexpr K = 3; // manifold dimension
+    const static int constexpr N = 4; // embedding dimension
+    using Vector3 = Eigen::Matrix<Scalar,3,1>;
+    using Vector4 = Eigen::Matrix<Scalar,4,1>;
+    using Matrix3 = Eigen::Matrix<Scalar,3,3>;
+    using Tangent = Eigen::Matrix<Scalar,K,1>;
+    using Data = Eigen::Matrix<Scalar,N,1>;
+    using Point = Eigen::Matrix<Scalar,3,1>;
+    using Point4 = Eigen::Matrix<Scalar,4,1>;
+    using Transformation = Eigen::Matrix<Scalar,3,3>;
+    using Adjoint = Eigen::Matrix<Scalar,K,K>;
+    using Quaternion = Eigen::Quaternion<Scalar>;
+    EIGEN_DEVICE_FUNC SO3(Quaternion const& q) : unit_quaternion(q) {
+      unit_quaternion.normalize();
+    };
+    EIGEN_DEVICE_FUNC SO3(const Scalar *data) : unit_quaternion(data) {
+      unit_quaternion.normalize();
+    };
+    EIGEN_DEVICE_FUNC SO3() {
+      unit_quaternion = Quaternion::Identity();
+    }
+    EIGEN_DEVICE_FUNC SO3<Scalar> inv() {
+      return SO3<Scalar>(unit_quaternion.conjugate());
+    }
+    EIGEN_DEVICE_FUNC Data data() const {
+      return unit_quaternion.coeffs();
+    }
+    EIGEN_DEVICE_FUNC SO3<Scalar> operator*(SO3<Scalar> const& other) {
+      return SO3(unit_quaternion * other.unit_quaternion);
+    }
+    EIGEN_DEVICE_FUNC Point operator*(Point const& p) const {
+      const Quaternion& q = unit_quaternion;
+      Point uv = q.vec().cross(p);
+      uv += uv;
+      return p + q.w()*uv + q.vec().cross(uv);
+    }
+    EIGEN_DEVICE_FUNC Point4 act4(Point4 const& p) const {
+      Point4 p1; p1 << this->operator*(p.template segment<3>(0)), p(3);
+      return p1;
+    }
+    EIGEN_DEVICE_FUNC Adjoint Adj() const {
+      return unit_quaternion.toRotationMatrix();
+    }
+    EIGEN_DEVICE_FUNC Transformation Matrix() const {
+      return unit_quaternion.toRotationMatrix();
+    }
+    EIGEN_DEVICE_FUNC Eigen::Matrix<Scalar,4,4> Matrix4x4() const {
+      Eigen::Matrix<Scalar,4,4> T = Eigen::Matrix<Scalar,4,4>::Identity();
+      T.template block<3,3>(0,0) = Matrix();
+      return T;
+    }
+    EIGEN_DEVICE_FUNC Eigen::Matrix<Scalar,4,4> orthogonal_projector() const {
+      // jacobian action on a point
+      Eigen::Matrix<Scalar,4,4> J = Eigen::Matrix<Scalar,4,4>::Zero();
+      J.template block<3,3>(0,0) = 0.5 * (
+        unit_quaternion.w() * Matrix3::Identity() +
+        SO3<Scalar>::hat(-unit_quaternion.vec())
+      );
+      J.template block<1,3>(3,0) = 0.5 * (-unit_quaternion.vec());
+      return J;
+    }
+    EIGEN_DEVICE_FUNC Tangent Adj(Tangent const& a) const {
+      return Adj() * a;
+    }
+    EIGEN_DEVICE_FUNC Tangent AdjT(Tangent const& a) const {
+      return Adj().transpose() * a;
+    }
+    EIGEN_DEVICE_FUNC static Transformation hat(Tangent const& phi) {
+      Transformation Phi;
+      Phi <<
+        0.0, -phi(2), phi(1),
+        phi(2), 0.0, -phi(0),
+        -phi(1), phi(0), 0.0;
+      return Phi;
+    }
+    EIGEN_DEVICE_FUNC static Adjoint adj(Tangent const& phi) {
+      return SO3<Scalar>::hat(phi);
+    }
+    EIGEN_DEVICE_FUNC Tangent Log() const {
+      using std::abs;
+      using std::atan;
+      using std::sqrt;
+      Scalar squared_n = unit_quaternion.vec().squaredNorm();
+      Scalar w = unit_quaternion.w();
+      Scalar two_atan_nbyw_by_n;
+      /// Atan-based log thanks to
+      ///
+      /// C. Hertzberg et al.:
+      /// "Integrating Generic Sensor Fusion Algorithms with Sound State
+      /// Representation through Encapsulation of Manifolds"
+      /// Information Fusion, 2011
+      if (squared_n < EPS * EPS) {
+        // If quaternion is normalized and n=0, then w should be 1;
+        // w=0 should never happen here!
+        Scalar squared_w = w * w;
+        two_atan_nbyw_by_n =
+            Scalar(2) / w - Scalar(2.0/3.0) * (squared_n) / (w * squared_w);
+      } else {
+        Scalar n = sqrt(squared_n);
+        if (abs(w) < EPS) {
+          if (w > Scalar(0)) {
+            two_atan_nbyw_by_n = Scalar(PI) / n;
+          } else {
+            two_atan_nbyw_by_n = -Scalar(PI) / n;
+          }
+        } else {
+          two_atan_nbyw_by_n = Scalar(2) * atan(n / w) / n;
+        }
+      }
+      return two_atan_nbyw_by_n * unit_quaternion.vec();
+    }
+    EIGEN_DEVICE_FUNC static SO3<Scalar> Exp(Tangent const& phi) {
+      Scalar theta2 = phi.squaredNorm();
+      Scalar theta = sqrt(theta2);
+      Scalar imag_factor;
+      Scalar real_factor;
+      if (theta < EPS) {
+        Scalar theta4 = theta2 * theta2;
+        imag_factor = Scalar(0.5) - Scalar(1.0/48.0) * theta2 + Scalar(1.0/3840.0) * theta4;
+        real_factor = Scalar(1) - Scalar(1.0/8.0) * theta2 + Scalar(1.0/384.0) * theta4;
+      } else {
+        imag_factor = sin(.5 * theta) / theta;
+        real_factor = cos(.5 * theta);
+      }
+      Quaternion q(real_factor, imag_factor*phi.x(), imag_factor*phi.y(), imag_factor*phi.z());
+      return SO3<Scalar>(q);
+    }
+    EIGEN_DEVICE_FUNC static Adjoint left_jacobian(Tangent const& phi) {
+      // left jacobian
+      Matrix3 I = Matrix3::Identity();
+      Matrix3 Phi = SO3<Scalar>::hat(phi);
+      Matrix3 Phi2 = Phi * Phi;
+      Scalar theta2 = phi.squaredNorm();
+      Scalar theta = sqrt(theta2);
+      Scalar coef1 = (theta < EPS) ?
+        Scalar(1.0/2.0) - Scalar(1.0/24.0) * theta2 :
+        (1.0 - cos(theta)) / theta2;
+      Scalar coef2 = (theta < EPS) ?
+        Scalar(1.0/6.0) - Scalar(1.0/120.0) * theta2 :
+        (theta - sin(theta)) / (theta2 * theta);
+      return I + coef1 * Phi + coef2 * Phi2;
+    }
+    EIGEN_DEVICE_FUNC static Adjoint left_jacobian_inverse(Tangent const& phi) {
+      // left jacobian inverse
+      Matrix3 I = Matrix3::Identity();
+      Matrix3 Phi = SO3<Scalar>::hat(phi);
+      Matrix3 Phi2 = Phi * Phi;
+      Scalar theta2 = phi.squaredNorm();
+      Scalar theta = sqrt(theta2);
+      Scalar half_theta = Scalar(.5) * theta ;
+      Scalar coef2 = (theta < EPS) ? Scalar(1.0/12.0) :
+           (Scalar(1) -
+            theta * cos(half_theta) / (Scalar(2) * sin(half_theta))) /
+               (theta * theta);
+      return I + Scalar(-0.5) * Phi + coef2 * Phi2;
+    }
+    EIGEN_DEVICE_FUNC static Eigen::Matrix<Scalar,3,3> act_jacobian(Point const& p) {
+      // jacobian action on a point
+      return SO3<Scalar>::hat(-p);
+    }
+    EIGEN_DEVICE_FUNC static Eigen::Matrix<Scalar,4,3> act4_jacobian(Point4 const& p) {
+      // jacobian action on a point
+      Eigen::Matrix<Scalar,4,3> J = Eigen::Matrix<Scalar,4,3>::Zero();
+      J.template block<3,3>(0,0) = SO3<Scalar>::hat(-p.template segment<3>(0));
+      return J;
+    }
+  private:
+    Quaternion unit_quaternion;
+};
+#endif

third-party/DPVO/dpvo/lietorch/run_tests.py ADDED Viewed

	@@ -0,0 +1,302 @@

+import torch
+import lietorch
+from lietorch import SO3, RxSO3, SE3, Sim3
+from gradcheck import gradcheck, get_analytical_jacobian
+### forward tests ###
+def make_homogeneous(p):
+    return torch.cat([p, torch.ones_like(p[...,:1])], dim=-1)
+def matv(A, b):
+    return torch.matmul(A, b[...,None])[..., 0]
+def test_exp_log(Group, device='cuda'):
+    """ check Log(Exp(x)) == x """
+    a = .2*torch.randn(2,3,4,5,6,7,Group.manifold_dim, device=device).double()
+    b = Group.exp(a).log()
+    assert torch.allclose(a,b,atol=1e-8), "should be identity"
+    print("\t-", Group, "Passed exp-log test")
+def test_inv(Group, device='cuda'):
+    """ check X * X^{-1} == 0 """
+    X = Group.exp(.1*torch.randn(2,3,4,5,Group.manifold_dim, device=device).double())
+    a = (X * X.inv()).log()
+    assert torch.allclose(a, torch.zeros_like(a), atol=1e-8), "should be 0"
+    print("\t-", Group, "Passed inv test")
+def test_adj(Group, device='cuda'):
+    """ check X * Exp(a) == Exp(Adj(X,a)) * X 0 """
+    X = Group.exp(torch.randn(2,3,4,5, Group.manifold_dim, device=device).double())
+    a = torch.randn(2,3,4,5, Group.manifold_dim, device=device).double()
+    b = X.adj(a)
+    Y1 = X * Group.exp(a)
+    Y2 = Group.exp(b) * X
+    c = (Y1 * Y2.inv()).log()
+    assert torch.allclose(c, torch.zeros_like(c), atol=1e-8), "should be 0"
+    print("\t-", Group, "Passed adj test")
+def test_act(Group, device='cuda'):
+    X = Group.exp(torch.randn(1, Group.manifold_dim, device=device).double())
+    p = torch.randn(1,3,device=device).double()
+    p1 = X.act(p)
+    p2 = matv(X.matrix(), make_homogeneous(p))
+    assert torch.allclose(p1, p2[...,:3], atol=1e-8), "should be 0"
+    print("\t-", Group, "Passed act test")
+### backward tests ###
+def test_exp_log_grad(Group, device='cuda', tol=1e-8):
+    D = Group.manifold_dim
+    def fn(a):
+        return Group.exp(a).log()
+    a = torch.zeros(1, Group.manifold_dim, requires_grad=True, device=device).double()
+    analytical, reentrant, correct_grad_sizes, correct_grad_types = \
+        get_analytical_jacobian((a,), fn(a))
+    assert torch.allclose(analytical[0], torch.eye(D, device=device).double(), atol=tol)
+    a = .2 * torch.randn(1, Group.manifold_dim, requires_grad=True, device=device).double()
+    analytical, reentrant, correct_grad_sizes, correct_grad_types = \
+        get_analytical_jacobian((a,), fn(a))
+    assert torch.allclose(analytical[0], torch.eye(D, device=device).double(), atol=tol)
+    print("\t-", Group, "Passed eye-grad test")
+def test_inv_log_grad(Group, device='cuda', tol=1e-8):
+    D = Group.manifold_dim
+    X = Group.exp(.2*torch.randn(1,D,device=device).double())
+    def fn(a):
+        return (Group.exp(a) * X).inv().log()
+    a = torch.zeros(1, D, requires_grad=True, device=device).double()
+    analytical, numerical = gradcheck(fn, [a], eps=1e-4)
+    # assert torch.allclose(analytical[0], numerical[0], atol=tol)
+    if not torch.allclose(analytical[0], numerical[0], atol=tol):
+        print(analytical[0])
+        print(numerical[0])
+    print("\t-", Group, "Passed inv-grad test")
+def test_adj_grad(Group, device='cuda'):
+    D = Group.manifold_dim
+    X = Group.exp(.5*torch.randn(1,Group.manifold_dim, device=device).double())
+    def fn(a, b):
+        return (Group.exp(a) * X).adj(b)
+    a = torch.zeros(1, D, requires_grad=True, device=device).double()
+    b = torch.randn(1, D, requires_grad=True, device=device).double()
+    analytical, numerical = gradcheck(fn, [a, b], eps=1e-4)
+    assert torch.allclose(analytical[0], numerical[0], atol=1e-8)
+    assert torch.allclose(analytical[1], numerical[1], atol=1e-8)
+    print("\t-", Group, "Passed adj-grad test")
+def test_adjT_grad(Group, device='cuda'):
+    D = Group.manifold_dim
+    X = Group.exp(.5*torch.randn(1,Group.manifold_dim, device=device).double())
+    def fn(a, b):
+        return (Group.exp(a) * X).adjT(b)
+    a = torch.zeros(1, D, requires_grad=True, device=device).double()
+    b = torch.randn(1, D, requires_grad=True, device=device).double()
+    analytical, numerical = gradcheck(fn, [a, b], eps=1e-4)
+    assert torch.allclose(analytical[0], numerical[0], atol=1e-8)
+    assert torch.allclose(analytical[1], numerical[1], atol=1e-8)
+    print("\t-", Group, "Passed adjT-grad test")
+def test_act_grad(Group, device='cuda'):
+    D = Group.manifold_dim
+    X = Group.exp(5*torch.randn(1,D, device=device).double())
+    def fn(a, b):
+        return (X*Group.exp(a)).act(b)
+    a = torch.zeros(1, D, requires_grad=True, device=device).double()
+    b = torch.randn(1, 3, requires_grad=True, device=device).double()
+    analytical, numerical = gradcheck(fn, [a, b], eps=1e-4)
+    assert torch.allclose(analytical[0], numerical[0], atol=1e-8)
+    assert torch.allclose(analytical[1], numerical[1], atol=1e-8)
+    print("\t-", Group, "Passed act-grad test")
+def test_matrix_grad(Group, device='cuda'):
+    D = Group.manifold_dim
+    X = Group.exp(torch.randn(1, D, device=device).double())
+    def fn(a):
+        return (Group.exp(a) * X).matrix()
+    a = torch.zeros(1, D, requires_grad=True, device=device).double()
+    analytical, numerical = gradcheck(fn, [a], eps=1e-4)
+    assert torch.allclose(analytical[0], numerical[0], atol=1e-6)
+    print("\t-", Group, "Passed matrix-grad test")
+def extract_translation_grad(Group, device='cuda'):
+    """ prototype function """
+    D = Group.manifold_dim
+    X = Group.exp(5*torch.randn(1,D, device=device).double())
+    def fn(a):
+        return (Group.exp(a)*X).translation()
+    a = torch.zeros(1, D, requires_grad=True, device=device).double()
+    analytical, numerical = gradcheck(fn, [a], eps=1e-4)
+    assert torch.allclose(analytical[0], numerical[0], atol=1e-8)
+    print("\t-", Group, "Passed translation grad test")
+def test_vec_grad(Group, device='cuda', tol=1e-6):
+    D = Group.manifold_dim
+    X = Group.exp(5*torch.randn(1,D, device=device).double())
+    def fn(a):
+        return (Group.exp(a)*X).vec()
+    a = torch.zeros(1, D, requires_grad=True, device=device).double()
+    analytical, numerical = gradcheck(fn, [a], eps=1e-4)
+    assert torch.allclose(analytical[0], numerical[0], atol=tol)
+    print("\t-", Group, "Passed tovec grad test")
+def test_fromvec_grad(Group, device='cuda', tol=1e-6):
+    def fn(a):
+        if Group == SO3:
+            a = a / a.norm(dim=-1, keepdim=True)
+        elif Group == RxSO3:
+            q, s = a.split([4, 1], dim=-1)
+            q = q / q.norm(dim=-1, keepdim=True)
+            a = torch.cat([q, s.exp()], dim=-1)
+        elif Group == SE3:
+            t, q = a.split([3, 4], dim=-1)
+            q = q / q.norm(dim=-1, keepdim=True)
+            a = torch.cat([t, q], dim=-1)
+        elif Group == Sim3:
+            t, q, s = a.split([3, 4, 1], dim=-1)
+            q = q / q.norm(dim=-1, keepdim=True)
+            a = torch.cat([t, q, s.exp()], dim=-1)
+        return Group.InitFromVec(a).vec()
+    D = Group.embedded_dim
+    a = torch.randn(1, 2, D, requires_grad=True, device=device).double()
+    analytical, numerical = gradcheck(fn, [a], eps=1e-4)
+    assert torch.allclose(analytical[0], numerical[0], atol=tol)
+    print("\t-", Group, "Passed fromvec grad test")
+def scale(device='cuda'):
+    def fn(a, s):
+        X = SE3.exp(a)
+        X.scale(s)
+        return X.log()
+    s = torch.rand(1, requires_grad=True, device=device).double()
+    a = torch.randn(1, 6, requires_grad=True, device=device).double()
+    analytical, numerical = gradcheck(fn, [a, s], eps=1e-3)
+    print(analytical[1])
+    print(numerical[1])
+    assert torch.allclose(analytical[0], numerical[0], atol=1e-8)
+    assert torch.allclose(analytical[1], numerical[1], atol=1e-8)
+    print("\t-", "Passed se3-to-sim3 test")
+if __name__ == '__main__':
+    print("Testing lietorch forward pass (CPU) ...")
+    for Group in [SO3, RxSO3, SE3, Sim3]:
+        test_exp_log(Group, device='cpu')
+        test_inv(Group, device='cpu')
+        test_adj(Group, device='cpu')
+        test_act(Group, device='cpu')
+    print("Testing lietorch backward pass (CPU)...")
+    for Group in [SO3, RxSO3, SE3, Sim3]:
+        if Group == Sim3:
+            tol = 1e-3
+        else:
+            tol = 1e-8
+        test_exp_log_grad(Group, device='cpu', tol=tol)
+        test_inv_log_grad(Group, device='cpu', tol=tol)
+        test_adj_grad(Group, device='cpu')
+        test_adjT_grad(Group, device='cpu')
+        test_act_grad(Group, device='cpu')
+        test_matrix_grad(Group, device='cpu')
+        extract_translation_grad(Group, device='cpu')
+        test_vec_grad(Group, device='cpu')
+        test_fromvec_grad(Group, device='cpu')
+    print("Testing lietorch forward pass (GPU) ...")
+    for Group in [SO3, RxSO3, SE3, Sim3]:
+        test_exp_log(Group, device='cuda')
+        test_inv(Group, device='cuda')
+        test_adj(Group, device='cuda')
+        test_act(Group, device='cuda')
+    print("Testing lietorch backward pass (GPU)...")
+    for Group in [SO3, RxSO3, SE3, Sim3]:
+        if Group == Sim3:
+            tol = 1e-3
+        else:
+            tol = 1e-8
+        test_exp_log_grad(Group, device='cuda', tol=tol)
+        test_inv_log_grad(Group, device='cuda', tol=tol)
+        test_adj_grad(Group, device='cuda')
+        test_adjT_grad(Group, device='cuda')
+        test_act_grad(Group, device='cuda')
+        test_matrix_grad(Group, device='cuda')
+        extract_translation_grad(Group, device='cuda')
+        test_vec_grad(Group, device='cuda')
+        test_fromvec_grad(Group, device='cuda')

third-party/DPVO/dpvo/lietorch/src/lietorch.cpp ADDED Viewed

	@@ -0,0 +1,317 @@

+#include <torch/extension.h>
+#include <vector>
+#include "lietorch_gpu.h"
+#include "lietorch_cpu.h"
+#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
+/* Interface for cuda and c++ group operations
+    enum group_t { SO3=1, SE3=2, Sim3=3 };
+    X, Y, Z: (uppercase) Lie Group Elements
+    a, b, c: (lowercase) Lie Algebra Elements
+*/
+// Unary operations
+torch::Tensor expm(int group_index, torch::Tensor a) {
+    CHECK_CONTIGUOUS(a);
+    if (a.device().type() == torch::DeviceType::CPU) {
+        return exp_forward_cpu(group_index, a);
+    } else if (a.device().type() == torch::DeviceType::CUDA) {
+        return exp_forward_gpu(group_index, a);
+    }
+    return a;
+}
+std::vector<torch::Tensor> expm_backward(int group_index, torch::Tensor grad, torch::Tensor a) {
+    CHECK_CONTIGUOUS(a);
+    CHECK_CONTIGUOUS(grad);
+    if (a.device().type() == torch::DeviceType::CPU) {
+        return exp_backward_cpu(group_index, grad, a);
+    } else if (a.device().type() == torch::DeviceType::CUDA) {
+        return exp_backward_gpu(group_index, grad, a);
+    }
+    return {};
+}
+torch::Tensor logm(int group_index, torch::Tensor X) {
+    CHECK_CONTIGUOUS(X);
+    if (X.device().type() == torch::DeviceType::CPU) {
+        return log_forward_cpu(group_index, X);
+    } else if (X.device().type() == torch::DeviceType::CUDA) {
+        return log_forward_gpu(group_index, X);
+    }
+    return X;
+}
+std::vector<torch::Tensor> logm_backward(int group_index, torch::Tensor grad, torch::Tensor X) {
+    CHECK_CONTIGUOUS(X);
+    CHECK_CONTIGUOUS(grad);
+    if (X.device().type() == torch::DeviceType::CPU) {
+        return log_backward_cpu(group_index, grad, X);
+    } else if (X.device().type() == torch::DeviceType::CUDA) {
+        return log_backward_gpu(group_index, grad, X);
+    }
+    return {};
+}
+torch::Tensor inv(int group_index, torch::Tensor X) {
+    CHECK_CONTIGUOUS(X);
+    if (X.device().type() == torch::DeviceType::CPU) {
+        return inv_forward_cpu(group_index, X);
+    } else if (X.device().type() == torch::DeviceType::CUDA) {
+        return inv_forward_gpu(group_index, X);
+    }
+    return X;
+}
+std::vector<torch::Tensor> inv_backward(int group_index, torch::Tensor grad, torch::Tensor X) {
+    CHECK_CONTIGUOUS(X);
+    CHECK_CONTIGUOUS(grad);
+    if (X.device().type() == torch::DeviceType::CPU) {
+        return inv_backward_cpu(group_index, grad, X);
+    } else if (X.device().type() == torch::DeviceType::CUDA) {
+        return inv_backward_gpu(group_index, grad, X);
+    }
+    return {};
+}
+// Binary operations
+torch::Tensor mul(int group_index, torch::Tensor X, torch::Tensor Y) {
+    CHECK_CONTIGUOUS(X);
+    CHECK_CONTIGUOUS(Y);
+    if (X.device().type() == torch::DeviceType::CPU) {
+        return mul_forward_cpu(group_index, X, Y);
+    } else if (X.device().type() == torch::DeviceType::CUDA) {
+        return mul_forward_gpu(group_index, X, Y);
+    }
+    return X;
+}
+std::vector<torch::Tensor> mul_backward(int group_index, torch::Tensor grad, torch::Tensor X, torch::Tensor Y) {
+    CHECK_CONTIGUOUS(X);
+    CHECK_CONTIGUOUS(Y);
+    CHECK_CONTIGUOUS(grad);
+    if (X.device().type() == torch::DeviceType::CPU) {
+        return mul_backward_cpu(group_index, grad, X, Y);
+    } else if (X.device().type() == torch::DeviceType::CUDA) {
+        return mul_backward_gpu(group_index, grad, X, Y);
+    }
+    return {};
+}
+torch::Tensor adj(int group_index, torch::Tensor X, torch::Tensor a) {
+    CHECK_CONTIGUOUS(X);
+    CHECK_CONTIGUOUS(a);
+    if (X.device().type() == torch::DeviceType::CPU) {
+        return adj_forward_cpu(group_index, X, a);
+    } else if (X.device().type() == torch::DeviceType::CUDA) {
+        return adj_forward_gpu(group_index, X, a);
+    }
+    return X;
+}
+std::vector<torch::Tensor> adj_backward(int group_index, torch::Tensor grad, torch::Tensor X, torch::Tensor a) {
+    CHECK_CONTIGUOUS(X);
+    CHECK_CONTIGUOUS(a);
+    CHECK_CONTIGUOUS(grad);
+    if (X.device().type() == torch::DeviceType::CPU) {
+        return adj_backward_cpu(group_index, grad, X, a);
+    } else if (X.device().type() == torch::DeviceType::CUDA) {
+        return adj_backward_gpu(group_index, grad, X, a);
+    }
+    return {};
+}
+torch::Tensor adjT(int group_index, torch::Tensor X, torch::Tensor a) {
+    CHECK_CONTIGUOUS(X);
+    CHECK_CONTIGUOUS(a);
+    if (X.device().type() == torch::DeviceType::CPU) {
+        return adjT_forward_cpu(group_index, X, a);
+    } else if (X.device().type() == torch::DeviceType::CUDA) {
+        return adjT_forward_gpu(group_index, X, a);
+    }
+    return X;
+}
+std::vector<torch::Tensor> adjT_backward(int group_index, torch::Tensor grad, torch::Tensor X, torch::Tensor a) {
+    CHECK_CONTIGUOUS(X);
+    CHECK_CONTIGUOUS(a);
+    CHECK_CONTIGUOUS(grad);
+    if (X.device().type() == torch::DeviceType::CPU) {
+        return adjT_backward_cpu(group_index, grad, X, a);
+    } else if (X.device().type() == torch::DeviceType::CUDA) {
+        return adjT_backward_gpu(group_index, grad, X, a);
+    }
+    return {};
+}
+torch::Tensor act(int group_index, torch::Tensor X, torch::Tensor p) {
+    CHECK_CONTIGUOUS(X);
+    CHECK_CONTIGUOUS(p);
+    if (X.device().type() == torch::DeviceType::CPU) {
+        return act_forward_cpu(group_index, X, p);
+    } else if (X.device().type() == torch::DeviceType::CUDA) {
+        return act_forward_gpu(group_index, X, p);
+    }
+    return X;
+}
+std::vector<torch::Tensor> act_backward(int group_index, torch::Tensor grad, torch::Tensor X, torch::Tensor p) {
+    CHECK_CONTIGUOUS(X);
+    CHECK_CONTIGUOUS(p);
+    CHECK_CONTIGUOUS(grad);
+    if (X.device().type() == torch::DeviceType::CPU) {
+        return act_backward_cpu(group_index, grad, X, p);
+    } else if (X.device().type() == torch::DeviceType::CUDA) {
+        return act_backward_gpu(group_index, grad, X, p);
+    }
+    return {};
+}
+torch::Tensor act4(int group_index, torch::Tensor X, torch::Tensor p) {
+    CHECK_CONTIGUOUS(X);
+    CHECK_CONTIGUOUS(p);
+    if (X.device().type() == torch::DeviceType::CPU) {
+        return act4_forward_cpu(group_index, X, p);
+    } else if (X.device().type() == torch::DeviceType::CUDA) {
+        return act4_forward_gpu(group_index, X, p);
+    }
+    return X;
+}
+std::vector<torch::Tensor> act4_backward(int group_index, torch::Tensor grad, torch::Tensor X, torch::Tensor p) {
+    CHECK_CONTIGUOUS(X);
+    CHECK_CONTIGUOUS(p);
+    CHECK_CONTIGUOUS(grad);
+    if (X.device().type() == torch::DeviceType::CPU) {
+        return act4_backward_cpu(group_index, grad, X, p);
+    } else if (X.device().type() == torch::DeviceType::CUDA) {
+        return act4_backward_gpu(group_index, grad, X, p);
+    }
+    return {};
+}
+torch::Tensor projector(int group_index, torch::Tensor X) {
+    CHECK_CONTIGUOUS(X);
+    if (X.device().type() == torch::DeviceType::CPU) {
+        return orthogonal_projector_cpu(group_index, X);
+    } else if (X.device().type() == torch::DeviceType::CUDA) {
+        return orthogonal_projector_gpu(group_index, X);
+    }
+    return X;
+}
+torch::Tensor as_matrix(int group_index, torch::Tensor X) {
+    CHECK_CONTIGUOUS(X);
+    if (X.device().type() == torch::DeviceType::CPU) {
+        return as_matrix_forward_cpu(group_index, X);
+    } else if (X.device().type() == torch::DeviceType::CUDA) {
+        return as_matrix_forward_gpu(group_index, X);
+    }
+    return X;
+}
+torch::Tensor Jinv(int group_index, torch::Tensor X, torch::Tensor a) {
+    CHECK_CONTIGUOUS(X);
+    CHECK_CONTIGUOUS(a);
+    if (X.device().type() == torch::DeviceType::CPU) {
+        return jleft_forward_cpu(group_index, X, a);
+    } else if (X.device().type() == torch::DeviceType::CUDA) {
+        return jleft_forward_gpu(group_index, X, a);
+    }
+    return a;
+}
+// {exp, log, inv, mul, adj, adjT, act, act4} forward/backward bindings
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("expm", &expm, "exp map forward");
+  m.def("expm_backward", &expm_backward, "exp map backward");
+  m.def("logm", &logm, "log map forward");
+  m.def("logm_backward", &logm_backward, "log map backward");
+  m.def("inv", &inv, "inverse operator");
+  m.def("inv_backward", &inv_backward, "inverse operator backward");
+  m.def("mul", &mul, "group operator");
+  m.def("mul_backward", &mul_backward, "group operator backward");
+  m.def("adj", &adj, "adjoint operator");
+  m.def("adj_backward", &adj_backward, "adjoint operator backward");
+  m.def("adjT", &adjT, "transposed adjoint operator");
+  m.def("adjT_backward", &adjT_backward, "transposed adjoint operator backward");
+  m.def("act", &act, "action on point");
+  m.def("act_backward", &act_backward, "action on point backward");
+  m.def("act4", &act4, "action on homogeneous point");
+  m.def("act4_backward", &act4_backward, "action on homogeneous point backward");
+  // functions with no gradient
+  m.def("as_matrix", &as_matrix, "convert to matrix");
+  m.def("projector", &projector, "orthogonal projection matrix");
+  m.def("Jinv", &Jinv, "left inverse jacobian operator");
+};

third-party/DPVO/dpvo/lietorch/src/lietorch_cpu.cpp ADDED Viewed

	@@ -0,0 +1,657 @@

+#include "lietorch_cpu.h"
+#include <Eigen/Dense>
+#include <iostream>
+#include "common.h"
+#include "dispatch.h"
+#include "so3.h"
+#include "rxso3.h"
+#include "se3.h"
+#include "sim3.h"
+template <typename Group, typename scalar_t>
+void exp_forward_kernel(const scalar_t* a_ptr, scalar_t* X_ptr, int batch_size) {
+    // exponential map forward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+        for (int64_t i=start; i<end; i++) {
+            Tangent a(a_ptr + i*Group::K);
+            Eigen::Map<Data>(X_ptr + i*Group::N) = Group::Exp(a).data();
+        }
+    });
+}
+template <typename Group, typename scalar_t>
+void exp_backward_kernel(const scalar_t* grad, const scalar_t* a_ptr, scalar_t* da, int batch_size) {
+    // exponential map backward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
+    at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+        for (int64_t i=start; i<end; i++) {
+            Tangent a(a_ptr + i*Group::K);
+            Grad dX(grad + i*Group::N);
+            Eigen::Map<Grad>(da + i*Group::K) = dX * Group::left_jacobian(a);
+        }
+    });
+}
+template <typename Group, typename scalar_t>
+void log_forward_kernel(const scalar_t* X_ptr, scalar_t* a_ptr, int batch_size) {
+    // logarithm map forward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+        for (int64_t i=start; i<end; i++) {
+            Tangent a = Group(X_ptr + i*Group::N).Log();
+            Eigen::Map<Tangent>(a_ptr + i*Group::K) = a;
+        }
+    });
+}
+template <typename Group, typename scalar_t>
+void log_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, scalar_t* dX, int batch_size) {
+    // logarithm map backward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
+    at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+        for (int64_t i=start; i<end; i++) {
+            Tangent a = Group(X_ptr + i*Group::N).Log();
+            Grad da(grad + i*Group::K);
+            Eigen::Map<Grad>(dX + i*Group::N) = da * Group::left_jacobian_inverse(a);
+        }
+    });
+}
+template <typename Group, typename scalar_t>
+void inv_forward_kernel(const scalar_t* X_ptr, scalar_t* Y_ptr, int batch_size) {
+    // group inverse forward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+        for (int64_t i=start; i<end; i++) {
+            Group X(X_ptr + i*Group::N);
+            Eigen::Map<Data>(Y_ptr + i*Group::N) = X.inv().data();
+        }
+    });
+}
+template <typename Group, typename scalar_t>
+void inv_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, scalar_t *dX, int batch_size) {
+    // group inverse backward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
+    at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+        for (int64_t i=start; i<end; i++) {
+            Group Y = Group(X_ptr + i*Group::N).inv();
+            Grad dY(grad + i*Group::N);
+            Eigen::Map<Grad>(dX + i*Group::N) = -dY * Y.Adj();
+        }
+    });
+}
+template <typename Group, typename scalar_t>
+void mul_forward_kernel(const scalar_t* X_ptr, const scalar_t* Y_ptr, scalar_t* Z_ptr, int batch_size) {
+    // group multiplication forward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+        for (int64_t i=start; i<end; i++) {
+            Group Z = Group(X_ptr + i*Group::N) * Group(Y_ptr + i*Group::N);
+            Eigen::Map<Data>(Z_ptr + i*Group::N) = Z.data();
+        }
+    });
+}
+template <class Group, typename scalar_t>
+void mul_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* Y_ptr, scalar_t* dX, scalar_t* dY, int batch_size) {
+    // group multiplication backward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+        for (int64_t i=start; i<end; i++) {
+            Grad dZ(grad + i*Group::N);
+            Group X(X_ptr + i*Group::N);
+            Eigen::Map<Grad>(dX + i*Group::N) = dZ;
+            Eigen::Map<Grad>(dY + i*Group::N) = dZ * X.Adj();
+        }
+    });
+}
+template <typename Group, typename scalar_t>
+void adj_forward_kernel(const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* b_ptr, int batch_size) {
+    // adjoint forward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+        for (int64_t i=start; i<end; i++) {
+            Group X(X_ptr + i*Group::N);
+            Tangent a(a_ptr + i*Group::K);
+            Eigen::Map<Tangent>(b_ptr + i*Group::K) = X.Adj(a);
+        }
+    });
+}
+template <typename Group, typename scalar_t>
+void adj_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* dX, scalar_t* da, int batch_size) {
+    // adjoint backward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+        for (int64_t i=start; i<end; i++) {
+            Group X(X_ptr + i*Group::N);
+            Grad db(grad + i*Group::K);
+            Tangent a(a_ptr + i*Group::K);
+            Tangent b = X.Adj() * a;
+            Eigen::Map<Grad>(da + i*Group::K) = db * X.Adj();
+            Eigen::Map<Grad>(dX + i*Group::N) = -db * Group::adj(b);
+        }
+    });
+}
+template <typename Group, typename scalar_t>
+void adjT_forward_kernel(const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* b_ptr, int batch_size) {
+    // adjoint forward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+        for (int64_t i=start; i<end; i++) {
+            Group X(X_ptr + i*Group::N);
+            Tangent a(a_ptr + i*Group::K);
+            Eigen::Map<Tangent>(b_ptr + i*Group::K) = X.AdjT(a);
+        }
+    });
+}
+template <typename Group, typename scalar_t>
+void adjT_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* dX, scalar_t* da, int batch_size) {
+    // adjoint backward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+        for (int64_t i=start; i<end; i++) {
+            Group X(X_ptr + i*Group::N);
+            Tangent db(grad + i*Group::K);
+            Grad a(a_ptr + i*Group::K);
+            Eigen::Map<Tangent>(da + i*Group::K) = X.Adj(db);
+            Eigen::Map<Grad>(dX + i*Group::N) = -a * Group::adj(X.Adj(db));
+        }
+    });
+}
+template <typename Group, typename scalar_t>
+void act_forward_kernel(const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* q_ptr, int batch_size) {
+    // action on point forward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    using Point = Eigen::Matrix<scalar_t,3,1>;
+    at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+        for (int64_t i=start; i<end; i++) {
+            Group X(X_ptr + i*Group::N);
+            Point p(p_ptr + i*3);
+            Eigen::Map<Point>(q_ptr + i*3) = X * p;
+        }
+    });
+}
+template <typename Group, typename scalar_t>
+void act_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* dX, scalar_t* dp, int batch_size) {
+    // adjoint backward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
+    using Point = Eigen::Matrix<scalar_t,3,1>;
+    using PointGrad = Eigen::Matrix<scalar_t,1,3>;
+    using Transformation = Eigen::Matrix<scalar_t,4,4>;
+    at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+        for (int64_t i=start; i<end; i++) {
+            Group X(X_ptr + i*Group::N);
+            Point p(p_ptr + i*3);
+            PointGrad dq(grad + i*3);
+            Eigen::Map<PointGrad>(dp + i*3) = dq * X.Matrix().template block<3,3>(0,0);
+            Eigen::Map<Grad>(dX + i*Group::N) = dq * Group::act_jacobian(X*p);
+        }
+    });
+}
+// template <typename Group, typename scalar_t>
+// void tovec_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, scalar_t* dX, int batch_size) {
+//     // group inverse forward kernel
+//     using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+//     using Grad = Eigen::Matrix<scalar_t,1,Group::N>;
+//     at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+//         for (int64_t i=start; i<end; i++) {
+//             Group X(X_ptr + i*Group::N);
+//             Grad g(grad + i*Group::N);
+//             Eigen::Map<Grad>(dX + i*Group::N) = g * X.vec_jacobian();
+//         }
+//     });
+// }
+// template <typename Group, typename scalar_t>
+// void fromvec_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, scalar_t* dX, int batch_size) {
+//     // group inverse forward kernel
+//     using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+//     using Grad = Eigen::Matrix<scalar_t,1,Group::N>;
+//     at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+//         for (int64_t i=start; i<end; i++) {
+//             Group X(X_ptr + i*Group::N);
+//             Grad g(grad + i*Group::N);
+//             Eigen::Map<Grad>(dX + i*Group::N) = g * X.vec_jacobian();
+//         }
+//     });
+// }
+template <typename Group, typename scalar_t>
+void act4_forward_kernel(const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* q_ptr, int batch_size) {
+    // action on homogeneous point forward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    using Point = Eigen::Matrix<scalar_t,4,1>;
+    at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+        for (int64_t i=start; i<end; i++) {
+            Group X(X_ptr + i*Group::N);
+            Point p(p_ptr + i*4);
+            Eigen::Map<Point>(q_ptr + i*4) = X.act4(p);
+        }
+    });
+}
+template <typename Group, typename scalar_t>
+void act4_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* dX, scalar_t* dp, int batch_size) {
+    // action on homogeneous point backward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
+    using Point = Eigen::Matrix<scalar_t,4,1>;
+    using PointGrad = Eigen::Matrix<scalar_t,1,4>;
+    using Transformation = Eigen::Matrix<scalar_t,4,4>;
+    at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+        for (int64_t i=start; i<end; i++) {
+            Group X(X_ptr + i*Group::N);
+            Point p(p_ptr + i*4);
+            PointGrad dq(grad + i*4);
+            Eigen::Map<PointGrad>(dp + i*4) = dq * X.Matrix4x4();
+            const Point q = X.act4(p);
+            Eigen::Map<Grad>(dX + i*Group::N) = dq * Group::act4_jacobian(q);
+        }
+    });
+}
+template <typename Group, typename scalar_t>
+void as_matrix_forward_kernel(const scalar_t* X_ptr, scalar_t* T_ptr, int batch_size) {
+    // group inverse forward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    using Matrix4 = Eigen::Matrix<scalar_t,4,4,Eigen::RowMajor>;
+    at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+        for (int64_t i=start; i<end; i++) {
+            Group X(X_ptr + i*Group::N);
+            Eigen::Map<Matrix4>(T_ptr + i*16) = X.Matrix4x4();
+        }
+    });
+}
+template <typename Group, typename scalar_t>
+void orthogonal_projector_kernel(const scalar_t* X_ptr, scalar_t* P_ptr, int batch_size) {
+    // group inverse forward kernel
+    using Proj = Eigen::Matrix<scalar_t,Group::N,Group::N,Eigen::RowMajor>;
+    at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+        for (int64_t i=start; i<end; i++) {
+            Group X(X_ptr + i*Group::N);
+            Eigen::Map<Proj>(P_ptr + i*Group::N*Group::N) = X.orthogonal_projector();
+        }
+    });
+}
+template <typename Group, typename scalar_t>
+void jleft_forward_kernel(const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* b_ptr, int batch_size) {
+    // left-jacobian inverse action
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
+        for (int64_t i=start; i<end; i++) {
+            Group X(X_ptr + i*Group::N);
+            Tangent a(a_ptr + i*Group::K);
+            Tangent b = Group::left_jacobian_inverse(X.Log()) * a;
+            Eigen::Map<Tangent>(b_ptr + i*Group::K) = b;
+        }
+    });
+}
+// unary operations
+torch::Tensor exp_forward_cpu(int group_id, torch::Tensor a) {
+    int batch_size = a.size(0);
+    torch::Tensor X;
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, a.type(), "exp_forward_kernel", ([&] {
+        X = torch::zeros({batch_size, group_t::N}, a.options());
+        exp_forward_kernel<group_t, scalar_t>(
+            a.data_ptr<scalar_t>(),
+            X.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return X;
+}
+std::vector<torch::Tensor> exp_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor a) {
+    int batch_size = a.size(0);
+    torch::Tensor da = torch::zeros(a.sizes(), grad.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, a.type(), "exp_backward_kernel", ([&] {
+        exp_backward_kernel<group_t, scalar_t>(
+            grad.data_ptr<scalar_t>(),
+            a.data_ptr<scalar_t>(),
+            da.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return {da};
+}
+torch::Tensor log_forward_cpu(int group_id, torch::Tensor X) {
+    int batch_size = X.size(0);
+    torch::Tensor a;
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "log_forward_kernel", ([&] {
+        a = torch::zeros({batch_size, group_t::K}, X.options());
+        log_forward_kernel<group_t, scalar_t>(
+            X.data_ptr<scalar_t>(),
+            a.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return a;
+}
+std::vector<torch::Tensor> log_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X) {
+    int batch_size = X.size(0);
+    torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "log_backward_kernel", ([&] {
+        log_backward_kernel<group_t, scalar_t>(
+            grad.data_ptr<scalar_t>(),
+            X.data_ptr<scalar_t>(),
+            dX.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return {dX};
+}
+torch::Tensor inv_forward_cpu(int group_id, torch::Tensor X) {
+    int batch_size = X.size(0);
+    torch::Tensor Y = torch::zeros_like(X);
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "inv_forward_kernel", ([&] {
+        inv_forward_kernel<group_t, scalar_t>(
+            X.data_ptr<scalar_t>(),
+            Y.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return Y;
+}
+std::vector<torch::Tensor> inv_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X) {
+    int batch_size = X.size(0);
+    torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "inv_backward_kernel", ([&] {
+        inv_backward_kernel<group_t, scalar_t>(
+            grad.data_ptr<scalar_t>(),
+            X.data_ptr<scalar_t>(),
+            dX.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return {dX};
+}
+// binary operations
+torch::Tensor mul_forward_cpu(int group_id, torch::Tensor X, torch::Tensor Y) {
+    int batch_size = X.size(0);
+    torch::Tensor Z = torch::zeros_like(X);
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "mul_forward_kernel", ([&] {
+        mul_forward_kernel<group_t, scalar_t>(
+            X.data_ptr<scalar_t>(),
+            Y.data_ptr<scalar_t>(),
+            Z.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return Z;
+}
+std::vector<torch::Tensor> mul_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor Y) {
+    int batch_size = X.size(0);
+    torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
+    torch::Tensor dY = torch::zeros(Y.sizes(), grad.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "mul_backward_kernel", ([&] {
+        mul_backward_kernel<group_t, scalar_t>(
+            grad.data_ptr<scalar_t>(),
+            X.data_ptr<scalar_t>(),
+            Y.data_ptr<scalar_t>(),
+            dX.data_ptr<scalar_t>(),
+            dY.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return {dX, dY};
+}
+torch::Tensor adj_forward_cpu(int group_id, torch::Tensor X, torch::Tensor a) {
+    int batch_size = X.size(0);
+    torch::Tensor b = torch::zeros(a.sizes(), a.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adj_forward_kernel", ([&] {
+        adj_forward_kernel<group_t, scalar_t>(
+            X.data_ptr<scalar_t>(),
+            a.data_ptr<scalar_t>(),
+            b.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return b;
+}
+std::vector<torch::Tensor> adj_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor a) {
+    int batch_size = X.size(0);
+    torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
+    torch::Tensor da = torch::zeros(a.sizes(), grad.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adj_backward_kernel", ([&] {
+        adj_backward_kernel<group_t, scalar_t>(
+            grad.data_ptr<scalar_t>(),
+            X.data_ptr<scalar_t>(),
+            a.data_ptr<scalar_t>(),
+            dX.data_ptr<scalar_t>(),
+            da.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return {dX, da};
+}
+torch::Tensor adjT_forward_cpu(int group_id, torch::Tensor X, torch::Tensor a) {
+    int batch_size = X.size(0);
+    torch::Tensor b = torch::zeros(a.sizes(), a.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adjT_forward_kernel", ([&] {
+        adjT_forward_kernel<group_t, scalar_t>(
+            X.data_ptr<scalar_t>(),
+            a.data_ptr<scalar_t>(),
+            b.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return b;
+}
+std::vector<torch::Tensor> adjT_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor a) {
+    int batch_size = X.size(0);
+    torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
+    torch::Tensor da = torch::zeros(a.sizes(), grad.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adjT_backward_kernel", ([&] {
+        adjT_backward_kernel<group_t, scalar_t>(
+            grad.data_ptr<scalar_t>(),
+            X.data_ptr<scalar_t>(),
+            a.data_ptr<scalar_t>(),
+            dX.data_ptr<scalar_t>(),
+            da.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return {dX, da};
+}
+torch::Tensor act_forward_cpu(int group_id, torch::Tensor X, torch::Tensor p) {
+    int batch_size = X.size(0);
+    torch::Tensor q = torch::zeros(p.sizes(), p.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act_forward_kernel", ([&] {
+        act_forward_kernel<group_t, scalar_t>(
+            X.data_ptr<scalar_t>(),
+            p.data_ptr<scalar_t>(),
+            q.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return q;
+}
+std::vector<torch::Tensor> act_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor p) {
+    int batch_size = X.size(0);
+    torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
+    torch::Tensor dp = torch::zeros(p.sizes(), grad.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act_backward_kernel", ([&] {
+        act_backward_kernel<group_t, scalar_t>(
+            grad.data_ptr<scalar_t>(),
+            X.data_ptr<scalar_t>(),
+            p.data_ptr<scalar_t>(),
+            dX.data_ptr<scalar_t>(),
+            dp.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return {dX, dp};
+}
+torch::Tensor act4_forward_cpu(int group_id, torch::Tensor X, torch::Tensor p) {
+    int batch_size = X.size(0);
+    torch::Tensor q = torch::zeros(p.sizes(), p.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act4_forward_kernel", ([&] {
+        act4_forward_kernel<group_t, scalar_t>(
+            X.data_ptr<scalar_t>(),
+            p.data_ptr<scalar_t>(),
+            q.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return q;
+}
+std::vector<torch::Tensor> act4_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor p) {
+    int batch_size = X.size(0);
+    torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
+    torch::Tensor dp = torch::zeros(p.sizes(), grad.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act4_backward_kernel", ([&] {
+        act4_backward_kernel<group_t, scalar_t>(
+            grad.data_ptr<scalar_t>(),
+            X.data_ptr<scalar_t>(),
+            p.data_ptr<scalar_t>(),
+            dX.data_ptr<scalar_t>(),
+            dp.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return {dX, dp};
+}
+torch::Tensor as_matrix_forward_cpu(int group_id, torch::Tensor X) {
+    int batch_size = X.size(0);
+    torch::Tensor T4x4 = torch::zeros({X.size(0), 4, 4}, X.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "as_matrix_forward_kernel", ([&] {
+        as_matrix_forward_kernel<group_t, scalar_t>(
+            X.data_ptr<scalar_t>(),
+            T4x4.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return T4x4;
+}
+torch::Tensor orthogonal_projector_cpu(int group_id, torch::Tensor X) {
+    int batch_size = X.size(0);
+    torch::Tensor P;
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "orthogonal_projector_kernel", ([&] {
+        P = torch::zeros({X.size(0), group_t::N, group_t::N}, X.options());
+        orthogonal_projector_kernel<group_t, scalar_t>(X.data_ptr<scalar_t>(), P.data_ptr<scalar_t>(), batch_size);
+    }));
+    return P;
+}
+torch::Tensor jleft_forward_cpu(int group_id, torch::Tensor X, torch::Tensor a) {
+    int batch_size = X.size(0);
+    torch::Tensor b = torch::zeros(a.sizes(), a.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "jleft_forward_kernel", ([&] {
+        jleft_forward_kernel<group_t, scalar_t>(
+            X.data_ptr<scalar_t>(),
+            a.data_ptr<scalar_t>(),
+            b.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return b;
+}

third-party/DPVO/dpvo/lietorch/src/lietorch_gpu.cu ADDED Viewed

	@@ -0,0 +1,601 @@

+#include "lietorch_gpu.h"
+#include <Eigen/Dense>
+#include "common.h"
+#include "dispatch.h"
+#include "so3.h"
+#include "rxso3.h"
+#include "se3.h"
+#include "sim3.h"
+#define GPU_1D_KERNEL_LOOP(i, n) \
+  for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i<n; i += blockDim.x * gridDim.x)
+#define NUM_THREADS 256
+#define NUM_BLOCKS(batch_size) ((batch_size + NUM_THREADS - 1) / NUM_THREADS)
+template <typename Group, typename scalar_t>
+__global__ void exp_forward_kernel(const scalar_t* a_ptr, scalar_t* X_ptr, int num_threads) {
+    // exponential map forward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    GPU_1D_KERNEL_LOOP(i, num_threads) {
+        Tangent a(a_ptr + i*Group::K);
+        Eigen::Map<Data>(X_ptr + i*Group::N) = Group::Exp(a).data();
+    }
+}
+template <typename Group, typename scalar_t>
+__global__ void exp_backward_kernel(const scalar_t* grad, const scalar_t* a_ptr, scalar_t* da, int num_threads) {
+    // exponential map backward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    GPU_1D_KERNEL_LOOP(i, num_threads) {
+        Tangent a(a_ptr + i*Group::K);
+        Grad dX(grad + i*Group::N);
+        Eigen::Map<Grad>(da + i*Group::K) = dX * Group::left_jacobian(a);
+    }
+}
+template <typename Group, typename scalar_t>
+__global__ void log_forward_kernel(const scalar_t* X_ptr, scalar_t* a_ptr, int num_threads) {
+    // logarithm map forward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    GPU_1D_KERNEL_LOOP(i, num_threads) {
+        Tangent a = Group(X_ptr + i*Group::N).Log();
+        Eigen::Map<Tangent>(a_ptr + i*Group::K) = a;
+    }
+}
+template <typename Group, typename scalar_t>
+__global__ void log_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, scalar_t* dX, int num_threads) {
+    // logarithm map backward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    GPU_1D_KERNEL_LOOP(i, num_threads) {
+        Tangent a = Group(X_ptr + i*Group::N).Log();
+        Grad da(grad + i*Group::K);
+        Eigen::Map<Grad>(dX + i*Group::N) = da * Group::left_jacobian_inverse(a);
+    }
+}
+template <typename Group, typename scalar_t>
+__global__ void inv_forward_kernel(const scalar_t* X_ptr, scalar_t* Y_ptr, int num_threads) {
+    // group inverse forward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    GPU_1D_KERNEL_LOOP(i, num_threads) {
+        Group X(X_ptr + i*Group::N);
+        Eigen::Map<Data>(Y_ptr + i*Group::N) = X.inv().data();
+    }
+}
+template <typename Group, typename scalar_t>
+__global__ void inv_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, scalar_t *dX, int num_threads) {
+    // group inverse backward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    GPU_1D_KERNEL_LOOP(i, num_threads) {
+        Group Y = Group(X_ptr + i*Group::N).inv();
+        Grad dY(grad + i*Group::N);
+        Eigen::Map<Grad>(dX + i*Group::N) = -dY * Y.Adj();
+    }
+}
+template <typename Group, typename scalar_t>
+__global__ void mul_forward_kernel(const scalar_t* X_ptr, const scalar_t* Y_ptr, scalar_t* Z_ptr, int num_threads) {
+    // group multiplication forward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    GPU_1D_KERNEL_LOOP(i, num_threads) {
+        Group Z = Group(X_ptr + i*Group::N) * Group(Y_ptr + i*Group::N);
+        Eigen::Map<Data>(Z_ptr + i*Group::N) = Z.data();
+    }
+}
+template <class Group, typename scalar_t>
+__global__ void mul_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* Y_ptr, scalar_t* dX, scalar_t* dY, int num_threads) {
+    // group multiplication backward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    GPU_1D_KERNEL_LOOP(i, num_threads) {
+        Grad dZ(grad + i*Group::N);
+        Group X(X_ptr + i*Group::N);
+        Eigen::Map<Grad>(dX + i*Group::N) = dZ;
+        Eigen::Map<Grad>(dY + i*Group::N) = dZ * X.Adj();
+    }
+}
+template <typename Group, typename scalar_t>
+__global__ void adj_forward_kernel(const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* b_ptr, int num_threads) {
+    // adjoint forward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    GPU_1D_KERNEL_LOOP(i, num_threads) {
+        Group X(X_ptr + i*Group::N);
+        Tangent a(a_ptr + i*Group::K);
+        Eigen::Map<Tangent>(b_ptr + i*Group::K) = X.Adj(a);
+    }
+}
+template <typename Group, typename scalar_t>
+__global__ void adj_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* dX, scalar_t* da, int num_threads) {
+    // adjoint backward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    GPU_1D_KERNEL_LOOP(i, num_threads) {
+        Group X(X_ptr + i*Group::N);
+        Grad db(grad + i*Group::K);
+        Tangent a(a_ptr + i*Group::K);
+        Tangent b = X.Adj() * a;
+        Eigen::Map<Grad>(da + i*Group::K) = db * X.Adj();
+        Eigen::Map<Grad>(dX + i*Group::N) = -db * Group::adj(b);
+    }
+}
+template <typename Group, typename scalar_t>
+__global__ void adjT_forward_kernel(const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* b_ptr, int num_threads) {
+    // adjoint forward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    GPU_1D_KERNEL_LOOP(i, num_threads) {
+        Group X(X_ptr + i*Group::N);
+        Tangent a(a_ptr + i*Group::K);
+        Eigen::Map<Tangent>(b_ptr + i*Group::K) = X.AdjT(a);
+    }
+}
+template <typename Group, typename scalar_t>
+__global__ void adjT_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* dX, scalar_t* da, int num_threads) {
+    // adjoint backward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    GPU_1D_KERNEL_LOOP(i, num_threads) {
+        Group X(X_ptr + i*Group::N);
+        Tangent db(grad + i*Group::K);
+        Grad a(a_ptr + i*Group::K);
+        Eigen::Map<Tangent>(da + i*Group::K) = X.Adj(db);
+        Eigen::Map<Grad>(dX + i*Group::N) = -a * Group::adj(X.Adj(db));
+    }
+}
+template <typename Group, typename scalar_t>
+__global__ void act_forward_kernel(const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* q_ptr, int num_threads) {
+    // action on point forward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    using Point = Eigen::Matrix<scalar_t,3,1>;
+    GPU_1D_KERNEL_LOOP(i, num_threads) {
+        Group X(X_ptr + i*Group::N);
+        Point p(p_ptr + i*3);
+        Eigen::Map<Point>(q_ptr + i*3) = X * p;
+    }
+}
+template <typename Group, typename scalar_t>
+__global__ void act_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* dX, scalar_t* dp, int num_threads) {
+    // adjoint backward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
+    using Point = Eigen::Matrix<scalar_t,3,1>;
+    using PointGrad = Eigen::Matrix<scalar_t,1,3>;
+    using Transformation = Eigen::Matrix<scalar_t,4,4>;
+    GPU_1D_KERNEL_LOOP(i, num_threads) {
+        Group X(X_ptr + i*Group::N);
+        Point p(p_ptr + i*3);
+        PointGrad dq(grad + i*3);
+        Eigen::Map<PointGrad>(dp + i*3) = dq * X.Matrix4x4().block<3,3>(0,0);
+        Eigen::Map<Grad>(dX + i*Group::N) = dq * Group::act_jacobian(X*p);
+    }
+}
+template <typename Group, typename scalar_t>
+__global__ void act4_forward_kernel(const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* q_ptr, int num_threads) {
+    // action on point forward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    using Point = Eigen::Matrix<scalar_t,4,1>;
+    GPU_1D_KERNEL_LOOP(i, num_threads) {
+        Group X(X_ptr + i*Group::N);
+        Point p(p_ptr + i*4);
+        Eigen::Map<Point>(q_ptr + i*4) = X.act4(p);
+    }
+}
+template <typename Group, typename scalar_t>
+__global__ void act4_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* dX, scalar_t* dp, int num_threads) {
+    // adjoint backward kernel
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
+    using Point = Eigen::Matrix<scalar_t,4,1>;
+    using PointGrad = Eigen::Matrix<scalar_t,1,4>;
+    using Transformation = Eigen::Matrix<scalar_t,4,4>;
+    GPU_1D_KERNEL_LOOP(i, num_threads) {
+        Group X(X_ptr + i*Group::N);
+        Point p(p_ptr + i*4);
+        PointGrad dq(grad + i*4);
+        Eigen::Map<PointGrad>(dp + i*4) = dq * X.Matrix4x4();
+        const Point q = X.act4(p);
+        Eigen::Map<Grad>(dX + i*Group::N) = dq * Group::act4_jacobian(q);
+    }
+}
+template <typename Group, typename scalar_t>
+__global__ void as_matrix_forward_kernel(const scalar_t* X_ptr, scalar_t* T_ptr, int num_threads) {
+    // convert to 4x4 matrix representation
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    using Matrix4 = Eigen::Matrix<scalar_t,4,4,Eigen::RowMajor>;
+    GPU_1D_KERNEL_LOOP(i, num_threads) {
+        Group X(X_ptr + i*Group::N);
+        Eigen::Map<Matrix4>(T_ptr + i*16) = X.Matrix4x4();
+    }
+}
+template <typename Group, typename scalar_t>
+__global__ void orthogonal_projector_kernel(const scalar_t* X_ptr, scalar_t* P_ptr, int num_threads) {
+    // orthogonal projection matrix
+    using Proj = Eigen::Matrix<scalar_t,Group::N,Group::N,Eigen::RowMajor>;
+    GPU_1D_KERNEL_LOOP(i, num_threads) {
+        Group X(X_ptr + i*Group::N);
+        Eigen::Map<Proj>(P_ptr + i*Group::N*Group::N) = X.orthogonal_projector();
+    }
+}
+template <typename Group, typename scalar_t>
+__global__ void jleft_forward_kernel(const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* b_ptr, int num_threads) {
+    // left jacobian inverse action
+    using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
+    using Data = Eigen::Matrix<scalar_t,Group::N,1>;
+    GPU_1D_KERNEL_LOOP(i, num_threads) {
+        Group X(X_ptr + i*Group::N);
+        Tangent a(a_ptr + i*Group::K);
+        Tangent b = Group::left_jacobian_inverse(X.Log()) * a;
+        Eigen::Map<Tangent>(b_ptr + i*Group::K) = b;
+    }
+}
+// unary operations
+torch::Tensor exp_forward_gpu(int group_id, torch::Tensor a) {
+    int batch_size = a.size(0);
+    torch::Tensor X;
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, a.type(), "exp_forward_kernel", ([&] {
+        X = torch::zeros({batch_size, group_t::N}, a.options());
+        exp_forward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
+            a.data_ptr<scalar_t>(),
+            X.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return X;
+}
+std::vector<torch::Tensor> exp_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor a) {
+    int batch_size = a.size(0);
+    torch::Tensor da = torch::zeros(a.sizes(), grad.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, a.type(), "exp_backward_kernel", ([&] {
+        exp_backward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
+            grad.data_ptr<scalar_t>(),
+            a.data_ptr<scalar_t>(),
+            da.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return {da};
+}
+torch::Tensor log_forward_gpu(int group_id, torch::Tensor X) {
+    int batch_size = X.size(0);
+    torch::Tensor a;
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "log_forward_kernel", ([&] {
+        a = torch::zeros({batch_size, group_t::K}, X.options());
+        log_forward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
+            X.data_ptr<scalar_t>(),
+            a.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return a;
+}
+std::vector<torch::Tensor> log_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X) {
+    int batch_size = X.size(0);
+    torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "log_backward_kernel", ([&] {
+        log_backward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
+            grad.data_ptr<scalar_t>(),
+            X.data_ptr<scalar_t>(),
+            dX.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return {dX};
+}
+torch::Tensor inv_forward_gpu(int group_id, torch::Tensor X) {
+    int batch_size = X.size(0);
+    torch::Tensor Y = torch::zeros_like(X);
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "inv_forward_kernel", ([&] {
+        inv_forward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
+            X.data_ptr<scalar_t>(),
+            Y.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return Y;
+}
+std::vector<torch::Tensor> inv_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X) {
+    int batch_size = X.size(0);
+    torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "inv_backward_kernel", ([&] {
+        inv_backward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
+            grad.data_ptr<scalar_t>(),
+            X.data_ptr<scalar_t>(),
+            dX.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return {dX};
+}
+// binary operations
+torch::Tensor mul_forward_gpu(int group_id, torch::Tensor X, torch::Tensor Y) {
+    int batch_size = X.size(0);
+    torch::Tensor Z = torch::zeros_like(X);
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "mul_forward_kernel", ([&] {
+        mul_forward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
+            X.data_ptr<scalar_t>(),
+            Y.data_ptr<scalar_t>(),
+            Z.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return Z;
+}
+std::vector<torch::Tensor> mul_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor Y) {
+    int batch_size = X.size(0);
+    torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
+    torch::Tensor dY = torch::zeros(Y.sizes(), grad.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "mul_backward_kernel", ([&] {
+        mul_backward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
+            grad.data_ptr<scalar_t>(),
+            X.data_ptr<scalar_t>(),
+            Y.data_ptr<scalar_t>(),
+            dX.data_ptr<scalar_t>(),
+            dY.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return {dX, dY};
+}
+torch::Tensor adj_forward_gpu(int group_id, torch::Tensor X, torch::Tensor a) {
+    int batch_size = X.size(0);
+    torch::Tensor b = torch::zeros(a.sizes(), a.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adj_forward_kernel", ([&] {
+        adj_forward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
+            X.data_ptr<scalar_t>(),
+            a.data_ptr<scalar_t>(),
+            b.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return b;
+}
+std::vector<torch::Tensor> adj_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor a) {
+    int batch_size = X.size(0);
+    torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
+    torch::Tensor da = torch::zeros(a.sizes(), grad.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adj_backward_kernel", ([&] {
+        adj_backward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
+            grad.data_ptr<scalar_t>(),
+            X.data_ptr<scalar_t>(),
+            a.data_ptr<scalar_t>(),
+            dX.data_ptr<scalar_t>(),
+            da.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return {dX, da};
+}
+torch::Tensor adjT_forward_gpu(int group_id, torch::Tensor X, torch::Tensor a) {
+    int batch_size = X.size(0);
+    torch::Tensor b = torch::zeros(a.sizes(), a.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adjT_forward_kernel", ([&] {
+        adjT_forward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
+            X.data_ptr<scalar_t>(),
+            a.data_ptr<scalar_t>(),
+            b.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return b;
+}
+std::vector<torch::Tensor> adjT_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor a) {
+    int batch_size = X.size(0);
+    torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
+    torch::Tensor da = torch::zeros(a.sizes(), grad.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adjT_backward_kernel", ([&] {
+        adjT_backward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
+            grad.data_ptr<scalar_t>(),
+            X.data_ptr<scalar_t>(),
+            a.data_ptr<scalar_t>(),
+            dX.data_ptr<scalar_t>(),
+            da.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return {dX, da};
+}
+torch::Tensor act_forward_gpu(int group_id, torch::Tensor X, torch::Tensor p) {
+    int batch_size = X.size(0);
+    torch::Tensor q = torch::zeros(p.sizes(), p.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act_forward_kernel", ([&] {
+        act_forward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
+            X.data_ptr<scalar_t>(),
+            p.data_ptr<scalar_t>(),
+            q.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return q;
+}
+std::vector<torch::Tensor> act_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor p) {
+    int batch_size = X.size(0);
+    torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
+    torch::Tensor dp = torch::zeros(p.sizes(), grad.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act_backward_kernel", ([&] {
+        act_backward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
+            grad.data_ptr<scalar_t>(),
+            X.data_ptr<scalar_t>(),
+            p.data_ptr<scalar_t>(),
+            dX.data_ptr<scalar_t>(),
+            dp.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return {dX, dp};
+}
+torch::Tensor act4_forward_gpu(int group_id, torch::Tensor X, torch::Tensor p) {
+    int batch_size = X.size(0);
+    torch::Tensor q = torch::zeros(p.sizes(), p.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act4_forward_kernel", ([&] {
+        act4_forward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
+            X.data_ptr<scalar_t>(),
+            p.data_ptr<scalar_t>(),
+            q.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return q;
+}
+std::vector<torch::Tensor> act4_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor p) {
+    int batch_size = X.size(0);
+    torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
+    torch::Tensor dp = torch::zeros(p.sizes(), grad.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act4_backward_kernel", ([&] {
+        act4_backward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
+            grad.data_ptr<scalar_t>(),
+            X.data_ptr<scalar_t>(),
+            p.data_ptr<scalar_t>(),
+            dX.data_ptr<scalar_t>(),
+            dp.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return {dX, dp};
+}
+torch::Tensor as_matrix_forward_gpu(int group_id, torch::Tensor X) {
+    int batch_size = X.size(0);
+    torch::Tensor T4x4 = torch::zeros({X.size(0), 4, 4}, X.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "as_matrix_forward_kernel", ([&] {
+        as_matrix_forward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
+            X.data_ptr<scalar_t>(),
+            T4x4.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return T4x4;
+}
+torch::Tensor orthogonal_projector_gpu(int group_id, torch::Tensor X) {
+    int batch_size = X.size(0);
+    torch::Tensor P;
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "orthogonal_projector_kernel", ([&] {
+        P = torch::zeros({X.size(0), group_t::N, group_t::N}, X.options());
+        orthogonal_projector_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
+            X.data_ptr<scalar_t>(),
+            P.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return P;
+}
+torch::Tensor jleft_forward_gpu(int group_id, torch::Tensor X, torch::Tensor a) {
+    int batch_size = X.size(0);
+    torch::Tensor b = torch::zeros(a.sizes(), a.options());
+    DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "jleft_forward_kernel", ([&] {
+        jleft_forward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
+            X.data_ptr<scalar_t>(),
+            a.data_ptr<scalar_t>(),
+            b.data_ptr<scalar_t>(),
+            batch_size);
+    }));
+    return b;
+}

third-party/DPVO/dpvo/logger.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import torch
+from torch.utils.tensorboard import SummaryWriter
+SUM_FREQ = 100
+class Logger:
+    def __init__(self, name, scheduler):
+        self.total_steps = 0
+        self.running_loss = {}
+        self.writer = None
+        self.name = name
+        self.scheduler = scheduler
+    def _print_training_status(self):
+        if self.writer is None:
+            self.writer = SummaryWriter("runs/{}".format(self.name))
+            print([k for k in self.running_loss])
+        lr = self.scheduler.get_lr().pop()
+        metrics_data = [self.running_loss[k]/SUM_FREQ for k in self.running_loss.keys()]
+        training_str = "[{:6d}, {:10.7f}] ".format(self.total_steps+1, lr)
+        metrics_str = ("{:10.4f}, "*len(metrics_data)).format(*metrics_data)
+        # print the training status
+        print(training_str + metrics_str)
+        for key in self.running_loss:
+            val = self.running_loss[key] / SUM_FREQ
+            self.writer.add_scalar(key, val, self.total_steps)
+            self.running_loss[key] = 0.0
+    def push(self, metrics):
+        for key in metrics:
+            if key not in self.running_loss:
+                self.running_loss[key] = 0.0
+            self.running_loss[key] += metrics[key]
+        if self.total_steps % SUM_FREQ == SUM_FREQ-1:
+            self._print_training_status()
+            self.running_loss = {}
+        self.total_steps += 1
+    def write_dict(self, results):
+        if self.writer is None:
+            self.writer = SummaryWriter("runs/{}".format(self.name))
+            print([k for k in self.running_loss])
+        for key in results:
+            self.writer.add_scalar(key, results[key], self.total_steps)
+    def close(self):
+        self.writer.close()

third-party/DPVO/dpvo/loop_closure/long_term.py ADDED Viewed

	@@ -0,0 +1,267 @@

+import os
+import kornia as K
+import kornia.feature as KF
+import numpy as np
+import pypose as pp
+import torch
+import torch.multiprocessing as mp
+import torch.nn.functional as F
+from einops import asnumpy, rearrange, repeat
+from torch_scatter import scatter_max
+from .. import fastba
+from .. import projective_ops as pops
+from ..lietorch import SE3
+from .optim_utils import SE3_to_Sim3, make_pypose_Sim3, ransac_umeyama, run_DPVO_PGO
+from .retrieval import ImageCache, RetrievalDBOW
+class LongTermLoopClosure:
+    def __init__(self, cfg, patchgraph):
+        self.cfg = cfg
+        # Data structures to manage retrieval
+        self.retrieval = RetrievalDBOW()
+        self.imcache = ImageCache()
+        # Process to run PGO in parallel
+        self.lc_pool = mp.Pool(processes=1)
+        self.lc_process = self.lc_pool.apply_async(os.getpid)
+        self.manager = mp.Manager()
+        self.result_queue = self.manager.Queue()
+        self.lc_in_progress = False
+        # Patch graph + loop edges
+        self.pg = patchgraph
+        self.loop_ii = torch.zeros(0, dtype=torch.long)
+        self.loop_jj = torch.zeros(0, dtype=torch.long)
+        self.lc_count = 0
+        # warmup the jit compiler
+        ransac_umeyama(np.random.randn(3,3), np.random.randn(3,3), iterations=200, threshold=0.01)
+        self.detector = KF.DISK.from_pretrained("depth").to("cuda").eval()
+        self.matcher = KF.LightGlue("disk").to("cuda").eval()
+    def detect_keypoints(self, images, num_features=2048):
+        """ Pretty self explanitory! Alas, we can only use disk w/ lightglue. ORB is brittle """
+        _, _, h, w = images.shape
+        wh = torch.tensor([w, h]).view(1, 2).float().cuda()
+        features = self.detector(images, num_features, pad_if_not_divisible=True, window_size=15, score_threshold=40.0)
+        return [{
+            "keypoints": f.keypoints[None],
+            "descriptors": f.descriptors[None],
+            "image_size": wh
+        } for f in features]
+    def __call__(self, img, n):
+        img_np = K.tensor_to_image(img)
+        self.retrieval(img_np, n)
+        self.imcache(img_np, n)
+    def keyframe(self, k):
+        self.retrieval.keyframe(k)
+        self.imcache.keyframe(k)
+    def estimate_3d_keypoints(self, i):
+        """ Detect, match and triangulate 3D points """
+        """ Load the triplet of frames """
+        image_orig = self.imcache.load_frames([i-1,i,i+1], self.pg.intrinsics.device)
+        image = image_orig.float() / 255
+        fl = self.detect_keypoints(image)
+        """ Form keypoint trajectories """
+        trajectories = torch.full((2048, 3), -1, device='cuda', dtype=torch.long)
+        trajectories[:,1] = torch.arange(2048)
+        out = self.matcher({"image0": fl[0], "image1": fl[1]})
+        i0, i1 = out["matches"][0].mT
+        trajectories[i1, 0] = i0
+        out = self.matcher({"image0": fl[2], "image1": fl[1]})
+        i2, i1 = out["matches"][0].mT
+        trajectories[i1, 2] = i2
+        trajectories = trajectories[torch.randperm(2048)]
+        trajectories = trajectories[trajectories.min(dim=1).values >= 0]
+        a,b,c = trajectories.mT
+        n, _ = trajectories.shape
+        kps0 = fl[0]['keypoints'][:,a]
+        kps1 = fl[1]['keypoints'][:,b]
+        kps2 = fl[2]['keypoints'][:,c]
+        desc1 = fl[1]['descriptors'][:,b]
+        image_size = fl[1]["image_size"]
+        kk = torch.arange(n).cuda().repeat(2)
+        ii = torch.ones(2*n, device='cuda', dtype=torch.long)
+        jj = torch.zeros(2*n, device='cuda', dtype=torch.long)
+        jj[n:] = 2
+        """ Construct "mini" patch graph. """
+        true_disp = self.pg.patches_[i,:,2,1,1].median()
+        patches = torch.cat((kps1, torch.ones(1, n, 1).cuda() * true_disp), dim=-1)
+        patches = repeat(patches, '1 n uvd -> 1 n uvd 3 3', uvd=3)
+        target = rearrange(torch.stack((kps0, kps2)), 'ot 1 n uv -> 1 (ot n) uv', uv=2, n=n, ot=2)
+        weight = torch.ones_like(target)
+        poses = self.pg.poses[:,i-1:i+2].clone()
+        intrinsics = self.pg.intrinsics[:,i-1:i+2].clone() * 4
+        coords = pops.transform(SE3(poses), patches, intrinsics, ii, jj, kk)
+        coords = coords[:,:,1,1]
+        residual = (coords - target).norm(dim=-1).squeeze(0)
+        """ structure-only bundle adjustment """
+        lmbda = torch.as_tensor([1e-3], device="cuda")
+        fastba.BA(poses, patches, intrinsics,
+            target, weight, lmbda, ii, jj, kk, 3, 3, M=-1, iterations=6, eff_impl=False)
+        """ Only keep points with small residuals """
+        coords = pops.transform(SE3(poses), patches, intrinsics, ii, jj, kk)
+        coords = coords[:,:,1,1]
+        residual = (coords - target).norm(dim=-1).squeeze(0)
+        assert residual.numel() == 2*n
+        mask = scatter_max(residual, kk)[0] < 2
+        """ Un-project keypoints """
+        points = pops.iproj(patches, intrinsics[:,torch.ones(n, device='cuda', dtype=torch.long)])
+        points = (points[...,1,1,:3] / points[...,1,1,3:])
+        return points[:,mask].squeeze(0), {"keypoints": kps1[:,mask], "descriptors": desc1[:,mask], "image_size": image_size}
+    def attempt_loop_closure(self, n):
+        if self.lc_in_progress:
+            return
+        """ Check if a loop was detected """
+        cands = self.retrieval.detect_loop(thresh=self.cfg.LOOP_RETR_THRESH, num_repeat=self.cfg.LOOP_CLOSE_WINDOW_SIZE)
+        if cands is not None:
+            i, j = cands
+            """ A loop was detected. Try to close it """
+            lc_result = self.close_loop(i, j, n)
+            self.lc_count += int(lc_result)
+            """ Avoid multiple back-to-back detections """
+            if lc_result:
+                self.retrieval.confirm_loop(i, j)
+            self.retrieval.found.clear()
+        """ "Flush" the queue of frames into the loop-closure pipeline """
+        self.retrieval.save_up_to(n - self.cfg.REMOVAL_WINDOW - 2)
+        self.imcache.save_up_to(n - self.cfg.REMOVAL_WINDOW - 1)
+    def terminate(self, n):
+        self.retrieval.save_up_to(n-1)
+        self.imcache.save_up_to(n-1)
+        self.attempt_loop_closure(n)
+        if self.lc_in_progress:
+            self.lc_callback(skip_if_empty=False)
+        self.lc_process.get()
+        self.imcache.close()
+        self.lc_pool.close()
+        self.retrieval.close()
+        print(f"LC COUNT: {self.lc_count}")
+    def _rescale_deltas(self, s):
+        """ Rescale the poses of removed frames by their predicted scales """
+        tstamp_2_rescale = {}
+        for i in range(self.pg.n):
+            tstamp_2_rescale[self.pg.tstamps_[i]] = s[i]
+        for t, (t0, dP) in self.pg.delta.items():
+            t_src = t
+            while t_src in self.pg.delta:
+                t_src, _ = self.pg.delta[t_src]
+            s1 = tstamp_2_rescale[t_src]
+            self.pg.delta[t] = (t0, dP.scale(s1))
+    def lc_callback(self, skip_if_empty=True):
+        """ Check if the PGO finished running """
+        if skip_if_empty and self.result_queue.empty():
+            return
+        self.lc_in_progress = False
+        final_est = self.result_queue.get()
+        safe_i, _ = final_est.shape
+        res, s = final_est.tensor().cuda().split([7,1], dim=1)
+        s1 = torch.ones(self.pg.n, device=s.device)
+        s1[:safe_i] = s.squeeze()
+        self.pg.poses_[:safe_i] = SE3(res).inv().data
+        self.pg.patches_[:safe_i,:,2] /= s.view(safe_i, 1, 1, 1)
+        self._rescale_deltas(s1)
+        self.pg.normalize()
+    def close_loop(self, i, j, n):
+        """ This function tries to actually execute the loop closure """
+        MIN_NUM_INLIERS = 30 # Minimum number of inlier matches
+        # print("Found a match!", i, j)
+        """ Estimate 3d keypoints w/ features"""
+        i_pts, i_feat = self.estimate_3d_keypoints(i)
+        j_pts, j_feat = self.estimate_3d_keypoints(j)
+        _, _, iz = i_pts.mT
+        _, _, jz = j_pts.mT
+        th = 20 # a depth threshold. Far-away points aren't helpful
+        i_pts = i_pts[iz < th]
+        j_pts = j_pts[jz < th]
+        for key in ['keypoints', 'descriptors']:
+            i_feat[key] = i_feat[key][:,iz < th]
+            j_feat[key] = j_feat[key][:,jz < th]
+        # Early exit
+        if i_pts.numel() < MIN_NUM_INLIERS:
+            # print(f"Too few inliers (A): {i_pts.numel()=}")
+            return False
+        """ Match between the two point clouds """
+        out = self.matcher({"image0": i_feat, "image1": j_feat})
+        i_ind, j_ind = out["matches"][0].mT
+        i_pts = i_pts[i_ind]
+        j_pts = j_pts[j_ind]
+        assert i_pts.shape == j_pts.shape, (i_pts.shape, j_pts.shape)
+        i_pts, j_pts = asnumpy(i_pts.double()), asnumpy(j_pts.double())
+        # Early exit
+        if i_pts.size < MIN_NUM_INLIERS:
+            # print(f"Too few inliers (B): {i_pts.size=}")
+            return False
+        """ Estimate Sim(3) transformation """
+        r, t, s, num_inliers = ransac_umeyama(i_pts, j_pts, iterations=400, threshold=0.1) # threshold shouldn't be too low
+        # Exist if number of inlier matches is too small
+        if num_inliers < MIN_NUM_INLIERS:
+            # print(f"Too few inliers (C): {num_inliers=}")
+            return False
+        """ Run Pose-Graph Optimization (PGO) """
+        far_rel_pose = make_pypose_Sim3(r, t, s)[None]
+        Gi = pp.SE3(self.pg.poses[:,self.loop_ii])
+        Gj = pp.SE3(self.pg.poses[:,self.loop_jj])
+        Gij = Gj * Gi.Inv()
+        prev_sim3 = SE3_to_Sim3(Gij).data[0].cpu()
+        loop_poses = pp.Sim3(torch.cat((prev_sim3, far_rel_pose)))
+        loop_ii = torch.cat((self.loop_ii, torch.tensor([i])))
+        loop_jj = torch.cat((self.loop_jj, torch.tensor([j])))
+        pred_poses = pp.SE3(self.pg.poses_[:n]).Inv().cpu()
+        self.loop_ii = loop_ii
+        self.loop_jj = loop_jj
+        torch.set_num_threads(1)
+        self.lc_in_progress = True
+        self.lc_process = self.lc_pool.apply_async(run_DPVO_PGO, (pred_poses.data, loop_poses.data, loop_ii, loop_jj, self.result_queue))
+        return True

third-party/DPVO/dpvo/loop_closure/optim_utils.py ADDED Viewed

	@@ -0,0 +1,243 @@

+import cuda_ba
+import numba as nb
+import numpy as np
+import pypose as pp
+import torch
+from einops import parse_shape, rearrange
+from scipy.spatial.transform import Rotation as R
+def make_pypose_Sim3(rot, t, s):
+    q = R.from_matrix(rot).as_quat()
+    data = np.concatenate([t, q, np.array(s).reshape((1,))])
+    return pp.Sim3(data)
+def SE3_to_Sim3(x: pp.SE3):
+    out = torch.cat((x.data, torch.ones_like(x.data[...,:1])), dim=-1)
+    return pp.Sim3(out)
+@nb.njit(cache=True)
+def _format(es):
+    return np.asarray(es, dtype=np.int64).reshape((-1, 2))[1:]
+@nb.njit(cache=True)
+def reduce_edges(flow_mag, ii, jj, max_num_edges, nms):
+    es = [(-1, -1)]
+    if ii.size == 0:
+        return _format(es)
+    Ni, Nj = (ii.max()+1), (jj.max()+1)
+    ignore_lookup = np.zeros((Ni, Nj), dtype=nb.bool_)
+    idxs = np.argsort(flow_mag)
+    for idx in idxs: # edge index
+        if len(es) > max_num_edges:
+            break
+        i = ii[idx]
+        j = jj[idx]
+        mag = flow_mag[idx]
+        if ((j - i) < 30):
+            continue
+        if mag >= 1000: # i.e., inf
+            continue
+        if ignore_lookup[i, j]:
+            continue
+        es.append((i, j))
+        for di in range(-nms, nms+1):
+            i1 = i + di
+            if 0 <= i1 < Ni:
+                ignore_lookup[i1, j] = True
+    return _format(es)
+@nb.njit(cache=True)
+def umeyama_alignment(x: np.ndarray, y: np.ndarray):
+    """
+    The following function was copied from:
+    https://github.com/MichaelGrupp/evo/blob/3067541b350528fe46375423e5bc3a7c42c06c63/evo/core/geometry.py#L35
+    Computes the least squares solution parameters of an Sim(m) matrix
+    that minimizes the distance between a set of registered points.
+    Umeyama, Shinji: Least-squares estimation of transformation parameters
+                     between two point patterns. IEEE PAMI, 1991
+    :param x: mxn matrix of points, m = dimension, n = nr. of data points
+    :param y: mxn matrix of points, m = dimension, n = nr. of data points
+    :param with_scale: set to True to align also the scale (default: 1.0 scale)
+    :return: r, t, c - rotation matrix, translation vector and scale factor
+    """
+    # m = dimension, n = nr. of data points
+    m, n = x.shape
+    # means, eq. 34 and 35
+    mean_x = x.sum(axis=1) / n
+    mean_y = y.sum(axis=1) / n
+    # variance, eq. 36
+    # "transpose" for column subtraction
+    sigma_x = 1.0 / n * (np.linalg.norm(x - mean_x[:, np.newaxis])**2)
+    # covariance matrix, eq. 38
+    outer_sum = np.zeros((m, m))
+    for i in range(n):
+        outer_sum += np.outer((y[:, i] - mean_y), (x[:, i] - mean_x))
+    cov_xy = np.multiply(1.0 / n, outer_sum)
+    # SVD (text betw. eq. 38 and 39)
+    u, d, v = np.linalg.svd(cov_xy)
+    if np.count_nonzero(d > np.finfo(d.dtype).eps) < m - 1:
+        return None, None, None # Degenerate covariance rank, Umeyama alignment is not possible
+    # S matrix, eq. 43
+    s = np.eye(m)
+    if np.linalg.det(u) * np.linalg.det(v) < 0.0:
+        # Ensure a RHS coordinate system (Kabsch algorithm).
+        s[m - 1, m - 1] = -1
+    # rotation, eq. 40
+    r = u.dot(s).dot(v)
+    # scale & translation, eq. 42 and 41
+    c = 1 / sigma_x * np.trace(np.diag(d).dot(s))
+    t = mean_y - np.multiply(c, r.dot(mean_x))
+    return r, t, c
+@nb.njit(cache=True)
+def ransac_umeyama(src_points, dst_points, iterations=1, threshold=0.1):
+    best_inliers = 0
+    best_R = None
+    best_t = None
+    best_s = None
+    for _ in range(iterations):
+        # Randomly select three points
+        indices = np.random.choice(src_points.shape[0], 3, replace=False)
+        src_sample = src_points[indices]
+        dst_sample = dst_points[indices]
+        # Estimate transformation
+        R, t, s = umeyama_alignment(src_sample.T, dst_sample.T)
+        if t is None:
+            continue
+        # Apply transformation
+        transformed = (src_points @ (R * s).T) + t
+        # Count inliers (not ideal because depends on scene scale)
+        distances = np.sum((transformed - dst_points)**2, axis=1)**0.5
+        inlier_mask = distances < threshold
+        inliers = np.sum(inlier_mask)
+        # Update best transformation
+        if inliers > best_inliers:
+            best_inliers = inliers
+            best_R, best_t, best_s = umeyama_alignment(src_points[inlier_mask].T, dst_points[inlier_mask].T)
+        if inliers > 100:
+            break
+    return best_R, best_t, best_s, best_inliers
+def batch_jacobian(func, x):
+  def _func_sum(*x):
+    return func(*x).sum(dim=0)
+  _, b, c = torch.autograd.functional.jacobian(_func_sum, x, vectorize=True)
+  return rearrange(torch.stack((b,c)), 'N O B I -> N B O I', N=2)
+def _residual(C, Gi, Gj):
+    assert parse_shape(C, 'N _') == parse_shape(Gi, 'N _') == parse_shape(Gj, 'N _')
+    out = C @ pp.Exp(Gi) @ pp.Exp(Gj).Inv()
+    return out.Log().tensor()
+def residual(Ginv, input_poses, dSloop, ii, jj, jacobian=False):
+    # prep
+    device = Ginv.device
+    assert parse_shape(input_poses, '_ d') == dict(d=7)
+    pred_inv_poses = SE3_to_Sim3(input_poses).Inv()
+    # free variables
+    n, _ = pred_inv_poses.shape
+    kk = torch.arange(1, n, device=device)
+    ll = kk-1
+    # constants
+    Ti = pred_inv_poses[kk]
+    Tj = pred_inv_poses[ll]
+    dSij = Tj @ Ti.Inv()
+    constants = torch.cat((dSij, dSloop), dim=0)
+    iii = torch.cat((kk, ii))
+    jjj = torch.cat((ll, jj))
+    resid = _residual(constants, Ginv[iii], Ginv[jjj])
+    if not jacobian:
+        return resid
+    J_Ginv_i, J_Ginv_j = batch_jacobian(_residual, (constants, Ginv[iii], Ginv[jjj]))
+    return resid, (J_Ginv_i, J_Ginv_j, iii, jjj)
+    # print(f"{J_Ginv_i.shape=} {J_Ginv_j.shape=} {resid.shape=} {iii.shape=} {jjj.shape=}")
+    r = iii.numel()
+    assert parse_shape(J_Ginv_i, 'r do di') == parse_shape(J_Ginv_j, 'r do di') == dict(r=r, do=7, di=7)
+    J = torch.zeros(r, n, 7, 7, device=device)
+    rr = torch.arange(r, device=device)
+    J[rr, iii] = J_Ginv_i
+    J[rr, jjj] = J_Ginv_j
+    J = rearrange(J, 'r n do di -> r do n di')
+    return resid, J, (J_Ginv_i, J_Ginv_j, iii, jjj)
+def run_DPVO_PGO(pred_poses, loop_poses, loop_ii, loop_jj, queue):
+    final_est = perform_updates(pred_poses, loop_poses, loop_ii, loop_jj, iters=30)
+    safe_i = loop_ii.max().item() + 1
+    aa = SE3_to_Sim3(pred_poses.cpu())
+    final_est = (aa[[safe_i]] * final_est[[safe_i]].Inv()) * final_est
+    output = final_est[:safe_i]
+    queue.put(output)
+def perform_updates(input_poses, dSloop, ii_loop, jj_loop, iters, ep=0.0, lmbda=1e-6, fix_opt_window=False):
+    """ Run the Levenberg Marquardt algorithm """
+    input_poses = input_poses.clone()
+    if fix_opt_window:
+        freen = torch.cat((ii_loop, jj_loop)).max().item() + 1
+    else:
+        freen = -1
+    Ginv = SE3_to_Sim3(input_poses).Inv().Log()
+    residual_history = []
+    for itr in range(iters):
+        resid, (J_Ginv_i, J_Ginv_j, iii, jjj) = residual(Ginv, input_poses, dSloop, ii_loop, jj_loop, jacobian=True)
+        residual_history.append(resid.square().mean().item())
+        # print("#Residual", residual_history[-1])
+        delta_pose, = cuda_ba.solve_system(J_Ginv_i, J_Ginv_j, iii, jjj, resid, ep, lmbda, freen)
+        assert Ginv.shape == delta_pose.shape
+        Ginv_tmp = Ginv + delta_pose
+        new_resid = residual(Ginv_tmp, input_poses, dSloop, ii_loop, jj_loop)
+        if new_resid.square().mean() < residual_history[-1]:
+            Ginv = Ginv_tmp
+            lmbda /= 2
+        else:
+            lmbda *= 2
+        if (residual_history[-1] < 1e-5) and (itr >= 4) and ((residual_history[-5] / residual_history[-1]) < 1.5):
+            break
+    return pp.Exp(Ginv).Inv()

third-party/DPVO/dpvo/loop_closure/retrieval/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .retrieval_dbow import RetrievalDBOW
2	+ from .image_cache import ImageCache

third-party/DPVO/dpvo/loop_closure/retrieval/image_cache.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import os
+from multiprocessing import Pool
+from shutil import copytree
+from tempfile import TemporaryDirectory
+import cv2
+import kornia as K
+import numpy as np
+from einops import asnumpy, parse_shape, rearrange
+IMEXT = '.jpeg'
+JPEG_QUALITY = [int(cv2.IMWRITE_JPEG_QUALITY), 95]
+BLANK = np.zeros((500,500,3), dtype=np.uint8)
+class ImageCache:
+    def __init__(self):
+        self.image_buffer = {}
+        self.tmpdir = TemporaryDirectory()
+        self.stored_indices = np.zeros(100000, dtype=bool)
+        self.writer_pool = Pool(processes=1)
+        self.write_result = self.writer_pool.apply_async(cv2.imwrite, [f"{self.tmpdir.name}/warmup.png", BLANK, JPEG_QUALITY])
+        self._wait()
+    def __call__(self, image, n):
+        assert isinstance(image, np.ndarray)
+        assert image.dtype == np.uint8
+        assert parse_shape(image, '_ _ RGB') == dict(RGB=3)
+        self.image_buffer[n] = image
+    def _wait(self):
+        """ Wait until the previous image is finished writing """
+        self.write_result.wait()
+    def _write_image(self, i):
+        """ Save the image to disk (asynchronously) """
+        img = self.image_buffer.pop(i)
+        filepath = f"{self.tmpdir.name}/{i:08d}{IMEXT}"
+        assert not os.path.exists(filepath)
+        self._wait()
+        self.write_result = self.writer_pool.apply_async(cv2.imwrite, [filepath, img, JPEG_QUALITY])
+    def load_frames(self, idxs, device='cuda'):
+        self._wait()
+        assert np.all(self.stored_indices[idxs])
+        frame_list = [f"{self.tmpdir.name}/{i:08d}{IMEXT}" for i in idxs]
+        assert all(map(os.path.exists, frame_list))
+        image_list = [cv2.imread(f) for f in frame_list]
+        return K.utils.image_list_to_tensor(image_list).to(device=device)
+    def keyframe(self, k):
+        tmp = dict(self.image_buffer)
+        self.image_buffer.clear()
+        for n, v in tmp.items():
+            if n != k:
+                key = (n-1) if (n > k) else n
+                self.image_buffer[key] = v
+    def save_up_to(self, c):
+        """ Pop images from the buffer and write them to disk"""
+        for n in list(self.image_buffer):
+            if n <= c:
+                assert not self.stored_indices[n]
+                self._write_image(n)
+                self.stored_indices[n] = True
+    def close(self):
+        self._wait()
+        # copytree(self.tmpdir.name, '/tmp/temp')
+        self.tmpdir.cleanup()
+        # os.rename('/tmp/temp', self.tmpdir.name)
+        self.writer_pool.close()

third-party/DPVO/dpvo/loop_closure/retrieval/retrieval_dbow.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import os
+import time
+from multiprocessing import Process, Queue, Value
+import numpy as np
+from einops import parse_shape
+try:
+    import dpretrieval
+    dpretrieval.DPRetrieval
+except:
+    raise ModuleNotFoundError("Couldn't load dpretrieval. It may not be installed.")
+NMS = 50 # Slow motion gets removed from keyframes anyway. So this is really the keyframe distance
+RAD = 50
+def _dbow_loop(in_queue, out_queue, vocab_path, ready):
+    """ Run DBoW retrieval """
+    dbow = dpretrieval.DPRetrieval(vocab_path, 50)
+    ready.value = 1
+    while True:
+        n, image = in_queue.get()
+        dbow.insert_image(image)
+        q = dbow.query(n)
+        out_queue.put((n, q))
+class RetrievalDBOW:
+    def __init__(self, vocab_path="ORBvoc.txt"):
+        if not os.path.exists(vocab_path):
+            raise FileNotFoundError("""Missing the ORB vocabulary. Please download and un-tar it from """
+                                  """https://github.com/UZ-SLAMLab/ORB_SLAM3/blob/master/Vocabulary/ORBvoc.txt.tar.gz"""
+                                  f""" and place it in DPVO/""")
+        # Store a record of saved and unsaved images
+        self.image_buffer = {}
+        self.stored_indices = np.zeros(100000, dtype=bool)
+        # Keep track of detected and closed loops
+        self.prev_loop_closes = []
+        self.found = []
+        # Run DBoW in a separate process
+        self.in_queue = Queue(maxsize=20)
+        self.out_queue = Queue(maxsize=20)
+        ready = Value('i', 0)
+        self.proc = Process(target=_dbow_loop, args=(self.in_queue, self.out_queue, vocab_path, ready))
+        self.proc.start()
+        self.being_processed = 0
+        while not ready.value:
+            time.sleep(0.01)
+    def keyframe(self, k):
+        """ Once we keyframe an image, we can safely cache all images
+         before & including it """
+        tmp = dict(self.image_buffer)
+        self.image_buffer.clear()
+        for n, v in tmp.items():
+            if n != k:
+                key = (n-1) if (n > k) else n
+                self.image_buffer[key] = v
+    def save_up_to(self, c):
+        """ Add frames to the image-retrieval database """
+        for n in list(self.image_buffer):
+            if n <= c:
+                assert not self.stored_indices[n]
+                img = self.image_buffer.pop(n)
+                self.in_queue.put((n, img))
+                self.stored_indices[n] = True
+                self.being_processed += 1
+    def confirm_loop(self, i, j):
+        """ Record the loop closure so we don't have redundant edges"""
+        assert i > j
+        self.prev_loop_closes.append((i, j))
+    def _repetition_check(self, idx, num_repeat):
+        """ Check that we've retrieved <num_repeat> consecutive frames """
+        if (len(self.found) < num_repeat):
+            return
+        latest = self.found[-num_repeat:]
+        (b, _), (i, j), _ = latest
+        if (1 + idx - b) == num_repeat:
+            return (i, max(j,1)) # max(j,1) is to avoid centering the triplet on 0
+    def detect_loop(self, thresh, num_repeat=1):
+        """ Keep popping off the queue until the it is empty
+         or we find a positive pair """
+        while self.being_processed > 0:
+            x = self._detect_loop(thresh, num_repeat)
+            if x is not None:
+                return x
+    def _detect_loop(self, thresh, num_repeat=1):
+        """ Pop retrived pairs off the queue. Return if they have non-trivial score """
+        assert self.being_processed > 0
+        i, (score, j, _) = self.out_queue.get()
+        self.being_processed -= 1
+        if score < thresh:
+            return
+        assert i > j
+        # Ensure that this edge is not redundant
+        dists_sq = [(np.square(i - a) + np.square(j - b)) for a,b in self.prev_loop_closes]
+        if min(dists_sq, default=np.inf) < np.square(NMS):
+            return
+        # Add this frame pair to the list of retrieved matches
+        self.found.append((i, j))
+        # Check that we've retrieved <num_repeat> consecutive frames
+        return self._repetition_check(i, num_repeat)
+    def __call__(self, image, n):
+        """ Store the image into the frame buffer """
+        assert isinstance(image, np.ndarray)
+        assert image.dtype == np.uint8
+        assert parse_shape(image, '_ _ RGB') == dict(RGB=3)
+        self.image_buffer[n] = image
+    def close(self):
+        self.proc.terminate()
+        self.proc.join()

third-party/DPVO/dpvo/net.py ADDED Viewed

	@@ -0,0 +1,273 @@

+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from collections import OrderedDict
+import torch_scatter
+from torch_scatter import scatter_sum
+from . import fastba
+from . import altcorr
+from . import lietorch
+from .lietorch import SE3
+from .extractor import BasicEncoder, BasicEncoder4
+from .blocks import GradientClip, GatedResidual, SoftAgg
+from .utils import *
+from .ba import BA
+from . import projective_ops as pops
+autocast = torch.cuda.amp.autocast
+import matplotlib.pyplot as plt
+DIM = 384
+class Update(nn.Module):
+    def __init__(self, p):
+        super(Update, self).__init__()
+        self.c1 = nn.Sequential(
+            nn.Linear(DIM, DIM),
+            nn.ReLU(inplace=True),
+            nn.Linear(DIM, DIM))
+        self.c2 = nn.Sequential(
+            nn.Linear(DIM, DIM),
+            nn.ReLU(inplace=True),
+            nn.Linear(DIM, DIM))
+        self.norm = nn.LayerNorm(DIM, eps=1e-3)
+        self.agg_kk = SoftAgg(DIM)
+        self.agg_ij = SoftAgg(DIM)
+        self.gru = nn.Sequential(
+            nn.LayerNorm(DIM, eps=1e-3),
+            GatedResidual(DIM),
+            nn.LayerNorm(DIM, eps=1e-3),
+            GatedResidual(DIM),
+        )
+        self.corr = nn.Sequential(
+            nn.Linear(2*49*p*p, DIM),
+            nn.ReLU(inplace=True),
+            nn.Linear(DIM, DIM),
+            nn.LayerNorm(DIM, eps=1e-3),
+            nn.ReLU(inplace=True),
+            nn.Linear(DIM, DIM),
+        )
+        self.d = nn.Sequential(
+            nn.ReLU(inplace=False),
+            nn.Linear(DIM, 2),
+            GradientClip())
+        self.w = nn.Sequential(
+            nn.ReLU(inplace=False),
+            nn.Linear(DIM, 2),
+            GradientClip(),
+            nn.Sigmoid())
+    def forward(self, net, inp, corr, flow, ii, jj, kk):
+        """ update operator """
+        net = net + inp + self.corr(corr)
+        net = self.norm(net)
+        ix, jx = fastba.neighbors(kk, jj)
+        mask_ix = (ix >= 0).float().reshape(1, -1, 1)
+        mask_jx = (jx >= 0).float().reshape(1, -1, 1)
+        net = net + self.c1(mask_ix * net[:,ix])
+        net = net + self.c2(mask_jx * net[:,jx])
+        net = net + self.agg_kk(net, kk)
+        net = net + self.agg_ij(net, ii*12345 + jj)
+        net = self.gru(net)
+        return net, (self.d(net), self.w(net), None)
+class Patchifier(nn.Module):
+    def __init__(self, patch_size=3):
+        super(Patchifier, self).__init__()
+        self.patch_size = patch_size
+        self.fnet = BasicEncoder4(output_dim=128, norm_fn='instance')
+        self.inet = BasicEncoder4(output_dim=DIM, norm_fn='none')
+    def __image_gradient(self, images):
+        gray = ((images + 0.5) * (255.0 / 2)).sum(dim=2)
+        dx = gray[...,:-1,1:] - gray[...,:-1,:-1]
+        dy = gray[...,1:,:-1] - gray[...,:-1,:-1]
+        g = torch.sqrt(dx**2 + dy**2)
+        g = F.avg_pool2d(g, 4, 4)
+        return g
+    def forward(self, images, patches_per_image=80, disps=None, centroid_sel_strat='RANDOM', return_color=False):
+        """ extract patches from input images """
+        fmap = self.fnet(images) / 4.0
+        imap = self.inet(images) / 4.0
+        b, n, c, h, w = fmap.shape
+        P = self.patch_size
+        # bias patch selection towards regions with high gradient
+        if centroid_sel_strat == 'GRADIENT_BIAS':
+            g = self.__image_gradient(images)
+            x = torch.randint(1, w-1, size=[n, 3*patches_per_image], device="cuda")
+            y = torch.randint(1, h-1, size=[n, 3*patches_per_image], device="cuda")
+            coords = torch.stack([x, y], dim=-1).float()
+            g = altcorr.patchify(g[0,:,None], coords, 0).view(n, 3 * patches_per_image)
+            ix = torch.argsort(g, dim=1)
+            x = torch.gather(x, 1, ix[:, -patches_per_image:])
+            y = torch.gather(y, 1, ix[:, -patches_per_image:])
+        elif centroid_sel_strat == 'RANDOM':
+            x = torch.randint(1, w-1, size=[n, patches_per_image], device="cuda")
+            y = torch.randint(1, h-1, size=[n, patches_per_image], device="cuda")
+        else:
+            raise NotImplementedError(f"Patch centroid selection not implemented: {centroid_sel_strat}")
+        coords = torch.stack([x, y], dim=-1).float()
+        imap = altcorr.patchify(imap[0], coords, 0).view(b, -1, DIM, 1, 1)
+        gmap = altcorr.patchify(fmap[0], coords, P//2).view(b, -1, 128, P, P)
+        if return_color:
+            clr = altcorr.patchify(images[0], 4*(coords + 0.5), 0).view(b, -1, 3)
+        if disps is None:
+            disps = torch.ones(b, n, h, w, device="cuda")
+        grid, _ = coords_grid_with_index(disps, device=fmap.device)
+        patches = altcorr.patchify(grid[0], coords, P//2).view(b, -1, 3, P, P)
+        index = torch.arange(n, device="cuda").view(n, 1)
+        index = index.repeat(1, patches_per_image).reshape(-1)
+        if return_color:
+            return fmap, gmap, imap, patches, index, clr
+        return fmap, gmap, imap, patches, index
+class CorrBlock:
+    def __init__(self, fmap, gmap, radius=3, dropout=0.2, levels=[1,4]):
+        self.dropout = dropout
+        self.radius = radius
+        self.levels = levels
+        self.gmap = gmap
+        self.pyramid = pyramidify(fmap, lvls=levels)
+    def __call__(self, ii, jj, coords):
+        corrs = []
+        for i in range(len(self.levels)):
+            corrs += [ altcorr.corr(self.gmap, self.pyramid[i], coords / self.levels[i], ii, jj, self.radius, self.dropout) ]
+        return torch.stack(corrs, -1).view(1, len(ii), -1)
+class VONet(nn.Module):
+    def __init__(self, use_viewer=False):
+        super(VONet, self).__init__()
+        self.P = 3
+        self.patchify = Patchifier(self.P)
+        self.update = Update(self.P)
+        self.DIM = DIM
+        self.RES = 4
+    @autocast(enabled=False)
+    def forward(self, images, poses, disps, intrinsics, M=1024, STEPS=12, P=1, structure_only=False, rescale=False):
+        """ Estimates SE3 or Sim3 between pair of frames """
+        images = 2 * (images / 255.0) - 0.5
+        intrinsics = intrinsics / 4.0
+        disps = disps[:, :, 1::4, 1::4].float()
+        fmap, gmap, imap, patches, ix = self.patchify(images, disps=disps)
+        corr_fn = CorrBlock(fmap, gmap)
+        b, N, c, h, w = fmap.shape
+        p = self.P
+        patches_gt = patches.clone()
+        Ps = poses
+        d = patches[..., 2, p//2, p//2]
+        patches = set_depth(patches, torch.rand_like(d))
+        kk, jj = flatmeshgrid(torch.where(ix < 8)[0], torch.arange(0,8, device="cuda"), indexing='ij')
+        ii = ix[kk]
+        imap = imap.view(b, -1, DIM)
+        net = torch.zeros(b, len(kk), DIM, device="cuda", dtype=torch.float)
+        Gs = SE3.IdentityLike(poses)
+        if structure_only:
+            Gs.data[:] = poses.data[:]
+        traj = []
+        bounds = [-64, -64, w + 64, h + 64]
+        while len(traj) < STEPS:
+            Gs = Gs.detach()
+            patches = patches.detach()
+            n = ii.max() + 1
+            if len(traj) >= 8 and n < images.shape[1]:
+                if not structure_only: Gs.data[:,n] = Gs.data[:,n-1]
+                kk1, jj1 = flatmeshgrid(torch.where(ix  < n)[0], torch.arange(n, n+1, device="cuda"), indexing='ij')
+                kk2, jj2 = flatmeshgrid(torch.where(ix == n)[0], torch.arange(0, n+1, device="cuda"), indexing='ij')
+                ii = torch.cat([ix[kk1], ix[kk2], ii])
+                jj = torch.cat([jj1, jj2, jj])
+                kk = torch.cat([kk1, kk2, kk])
+                net1 = torch.zeros(b, len(kk1) + len(kk2), DIM, device="cuda")
+                net = torch.cat([net1, net], dim=1)
+                if np.random.rand() < 0.1:
+                    k = (ii != (n - 4)) & (jj != (n - 4))
+                    ii = ii[k]
+                    jj = jj[k]
+                    kk = kk[k]
+                    net = net[:,k]
+                patches[:,ix==n,2] = torch.median(patches[:,(ix == n-1) | (ix == n-2),2])
+                n = ii.max() + 1
+            coords = pops.transform(Gs, patches, intrinsics, ii, jj, kk)
+            coords1 = coords.permute(0, 1, 4, 2, 3).contiguous()
+            corr = corr_fn(kk, jj, coords1)
+            net, (delta, weight, _) = self.update(net, imap[:,kk], corr, None, ii, jj, kk)
+            lmbda = 1e-4
+            target = coords[...,p//2,p//2,:] + delta
+            ep = 10
+            for itr in range(2):
+                Gs, patches = BA(Gs, patches, intrinsics, target, weight, lmbda, ii, jj, kk,
+                    bounds, ep=ep, fixedp=1, structure_only=structure_only)
+            kl = torch.as_tensor(0)
+            dij = (ii - jj).abs()
+            k = (dij > 0) & (dij <= 2)
+            coords = pops.transform(Gs, patches, intrinsics, ii[k], jj[k], kk[k])
+            coords_gt, valid, _ = pops.transform(Ps, patches_gt, intrinsics, ii[k], jj[k], kk[k], jacobian=True)
+            traj.append((valid, coords, coords_gt, Gs[:,:n], Ps[:,:n], kl))
+        return traj

third-party/DPVO/dpvo/patchgraph.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import numpy as np
+import torch
+from einops import asnumpy, reduce, repeat
+from . import projective_ops as pops
+from .lietorch import SE3
+from .loop_closure.optim_utils import reduce_edges
+from .utils import *
+class PatchGraph:
+    """ Dataclass for storing variables """
+    def __init__(self, cfg, P, DIM, pmem, **kwargs):
+        self.cfg = cfg
+        self.P = P
+        self.pmem = pmem
+        self.DIM = DIM
+        self.n = 0      # number of frames
+        self.m = 0      # number of patches
+        self.M = self.cfg.PATCHES_PER_FRAME
+        self.N = self.cfg.BUFFER_SIZE
+        self.tstamps_ = np.zeros(self.N, dtype=np.int64)
+        self.poses_ = torch.zeros(self.N, 7, dtype=torch.float, device="cuda")
+        self.patches_ = torch.zeros(self.N, self.M, 3, self.P, self.P, dtype=torch.float, device="cuda")
+        self.intrinsics_ = torch.zeros(self.N, 4, dtype=torch.float, device="cuda")
+        self.points_ = torch.zeros(self.N * self.M, 3, dtype=torch.float, device="cuda")
+        self.colors_ = torch.zeros(self.N, self.M, 3, dtype=torch.uint8, device="cuda")
+        self.index_ = torch.zeros(self.N, self.M, dtype=torch.long, device="cuda")
+        self.index_map_ = torch.zeros(self.N, dtype=torch.long, device="cuda")
+        # initialize poses to identity matrix
+        self.poses_[:,6] = 1.0
+        # store relative poses for removed frames
+        self.delta = {}
+        ### edge information ###
+        self.net = torch.zeros(1, 0, DIM, **kwargs)
+        self.ii = torch.as_tensor([], dtype=torch.long, device="cuda")
+        self.jj = torch.as_tensor([], dtype=torch.long, device="cuda")
+        self.kk = torch.as_tensor([], dtype=torch.long, device="cuda")
+        ### inactive edge information (i.e., no longer updated, but useful for BA) ###
+        self.ii_inac = torch.as_tensor([], dtype=torch.long, device="cuda")
+        self.jj_inac = torch.as_tensor([], dtype=torch.long, device="cuda")
+        self.kk_inac = torch.as_tensor([], dtype=torch.long, device="cuda")
+        self.weight_inac = torch.zeros(1, 0, 2, dtype=torch.long, device="cuda")
+        self.target_inac = torch.zeros(1, 0, 2, dtype=torch.long, device="cuda")
+    def edges_loop(self):
+        """ Adding edges from old patches to new frames """
+        lc_range = self.cfg.MAX_EDGE_AGE
+        l = self.n - self.cfg.REMOVAL_WINDOW # l is the upper bound for "old" patches
+        if l <= 0:
+            return torch.empty(2, 0, dtype=torch.long, device='cuda')
+        # create candidate edges
+        jj, kk = flatmeshgrid(
+            torch.arange(self.n - self.cfg.GLOBAL_OPT_FREQ, self.n - self.cfg.KEYFRAME_INDEX, device="cuda"),
+            torch.arange(max(l - lc_range, 0) * self.M, l * self.M, device="cuda"), indexing='ij')
+        ii = self.ix[kk]
+        # Remove edges which have too large flow magnitude
+        flow_mg, val = pops.flow_mag(SE3(self.poses), self.patches[...,1,1].view(1,-1,3,1,1), self.intrinsics, ii, jj, kk, beta=0.5)
+        flow_mg_sum = reduce(flow_mg * val, '1 (fl M) 1 1 -> fl', 'sum', M=self.M).float()
+        num_val = reduce(val, '1 (fl M) 1 1 -> fl', 'sum', M=self.M).clamp(min=1)
+        flow_mag = torch.where(num_val > (self.M * 0.75), flow_mg_sum / num_val, torch.inf)
+        mask = (flow_mag < self.cfg.BACKEND_THRESH)
+        es = reduce_edges(asnumpy(flow_mag[mask]), asnumpy(ii[::self.M][mask]), asnumpy(jj[::self.M][mask]), max_num_edges=1000, nms=1)
+        edges = torch.as_tensor(es, device=ii.device)
+        ii, jj = repeat(edges, 'E ij -> ij E M', M=self.M, ij=2)
+        kk = ii.mul(self.M) + torch.arange(self.M, device=ii.device)
+        return kk.flatten(), jj.flatten()
+    def normalize(self):
+        """ normalize depth and poses """
+        s = self.patches_[:self.n,:,2].mean()
+        self.patches_[:self.n,:,2] /= s
+        self.poses_[:self.n,:3] *= s
+        for t, (t0, dP) in self.delta.items():
+            self.delta[t] = (t0, dP.scale(s))
+        self.poses_[:self.n] = (SE3(self.poses_[:self.n]) * SE3(self.poses_[[0]]).inv()).data
+        points = pops.point_cloud(SE3(self.poses), self.patches[:, :self.m], self.intrinsics, self.ix[:self.m])
+        points = (points[...,1,1,:3] / points[...,1,1,3:]).reshape(-1, 3)
+        self.points_[:len(points)] = points[:]
+    @property
+    def poses(self):
+        return self.poses_.view(1, self.N, 7)
+    @property
+    def patches(self):
+        return self.patches_.view(1, self.N*self.M, 3, 3, 3)
+    @property
+    def intrinsics(self):
+        return self.intrinsics_.view(1, self.N, 4)
+    @property
+    def ix(self):
+        return self.index_.view(-1)

third-party/DPVO/dpvo/plot_utils.py ADDED Viewed

	@@ -0,0 +1,64 @@

+from pathlib import Path
+import matplotlib.pyplot as plt
+import numpy as np
+from evo.core import sync
+from evo.core.trajectory import PoseTrajectory3D
+from evo.tools import plot
+from plyfile import PlyData, PlyElement
+def plot_trajectory(pred_traj, gt_traj=None, title="", filename="", align=True, correct_scale=True):
+    assert isinstance(pred_traj, PoseTrajectory3D)
+    if gt_traj is not None:
+        assert isinstance(gt_traj, PoseTrajectory3D)
+        gt_traj, pred_traj = sync.associate_trajectories(gt_traj, pred_traj)
+        if align:
+            pred_traj.align(gt_traj, correct_scale=correct_scale)
+    plot_collection = plot.PlotCollection("PlotCol")
+    fig = plt.figure(figsize=(8, 8))
+    plot_mode = plot.PlotMode.xz # ideal for planar movement
+    ax = plot.prepare_axis(fig, plot_mode)
+    ax.set_title(title)
+    if gt_traj is not None:
+        plot.traj(ax, plot_mode, gt_traj, '--', 'gray', "Ground Truth")
+    plot.traj(ax, plot_mode, pred_traj, '-', 'blue', "Predicted")
+    plot_collection.add_figure("traj (error)", fig)
+    plot_collection.export(filename, confirm_overwrite=False)
+    plt.close(fig=fig)
+    print(f"Saved {filename}")
+def save_output_for_COLMAP(name: str, traj: PoseTrajectory3D, points: np.ndarray, colors: np.ndarray, fx, fy, cx, cy, H=480, W=640):
+    """ Saves the sparse point cloud and camera poses such that it can be opened in COLMAP """
+    colmap_dir = Path(name)
+    colmap_dir.mkdir(exist_ok=True)
+    scale = 10 # for visualization
+    # images
+    images = ""
+    traj = PoseTrajectory3D(poses_se3=list(map(np.linalg.inv, traj.poses_se3)), timestamps=traj.timestamps)
+    for idx, (x,y,z), (qw, qx, qy, qz) in zip(range(1,traj.num_poses+1), traj.positions_xyz*scale, traj.orientations_quat_wxyz):
+        images += f"{idx} {qw} {qx} {qy} {qz} {x} {y} {z} 1\n\n"
+    (colmap_dir / "images.txt").write_text(images)
+    # points
+    points3D = ""
+    colors_uint = (colors * 255).astype(np.uint8).tolist()
+    for i, (p,c) in enumerate(zip((points*scale).tolist(), colors_uint), start=1):
+        points3D += f"{i} " + ' '.join(map(str, p + c)) + " 0.0 0 0 0 0 0 0\n"
+    (colmap_dir / "points3D.txt").write_text(points3D)
+    # camera
+    (colmap_dir / "cameras.txt").write_text(f"1 PINHOLE {W} {H} {fx} {fy} {cx} {cy}")
+    print(f"Saved COLMAP-compatible reconstruction in {colmap_dir.resolve()}")
+def save_ply(name: str, points: np.ndarray, colors: np.ndarray):
+    points_ply = np.array([(x,y,z,r,g,b) for (x,y,z),(r,g,b) in zip(points, colors)],
+                        dtype=[('x', '<f4'), ('y', '<f4'), ('z', '<f4'),('red', 'u1'), ('green', 'u1'),('blue', 'u1')])
+    el = PlyElement.describe(points_ply, 'vertex',{'some_property': 'f8'},{'some_property': 'u4'})
+    PlyData([el], text=True).write(f"{name}.ply")
+    print(f"Saved {name}.ply")

third-party/DPVO/dpvo/projective_ops.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import torch
+import torch.nn.functional as F
+from .lietorch import SE3, Sim3
+MIN_DEPTH = 0.2
+def extract_intrinsics(intrinsics):
+    return intrinsics[...,None,None,:].unbind(dim=-1)
+def coords_grid(ht, wd, **kwargs):
+    y, x = torch.meshgrid(
+        torch.arange(ht).to(**kwargs).float(),
+        torch.arange(wd).to(**kwargs).float())
+    return torch.stack([x, y], dim=-1)
+def iproj(patches, intrinsics):
+    """ inverse projection """
+    x, y, d = patches.unbind(dim=2)
+    fx, fy, cx, cy = intrinsics[...,None,None].unbind(dim=2)
+    i = torch.ones_like(d)
+    xn = (x - cx) / fx
+    yn = (y - cy) / fy
+    X = torch.stack([xn, yn, i, d], dim=-1)
+    return X
+def proj(X, intrinsics, depth=False):
+    """ projection """
+    X, Y, Z, W = X.unbind(dim=-1)
+    fx, fy, cx, cy = intrinsics[...,None,None].unbind(dim=2)
+    # d = 0.01 * torch.ones_like(Z)
+    # d[Z > 0.01] = 1.0 / Z[Z > 0.01]
+    # d = torch.ones_like(Z)
+    # d[Z.abs() > 0.1] = 1.0 / Z[Z.abs() > 0.1]
+    d = 1.0 / Z.clamp(min=0.1)
+    x = fx * (d * X) + cx
+    y = fy * (d * Y) + cy
+    if depth:
+        return torch.stack([x, y, d], dim=-1)
+    return torch.stack([x, y], dim=-1)
+def transform(poses, patches, intrinsics, ii, jj, kk, depth=False, valid=False, jacobian=False, tonly=False):
+    """ projective transform """
+    # backproject
+    X0 = iproj(patches[:,kk], intrinsics[:,ii])
+    # transform
+    Gij = poses[:, jj] * poses[:, ii].inv()
+    if tonly:
+        Gij[...,3:] = torch.as_tensor([0,0,0,1], device=Gij.device)
+    X1 = Gij[:,:,None,None] * X0
+    # project
+    x1 = proj(X1, intrinsics[:,jj], depth)
+    if jacobian:
+        p = X1.shape[2]
+        X, Y, Z, H = X1[...,p//2,p//2,:].unbind(dim=-1)
+        o = torch.zeros_like(H)
+        i = torch.zeros_like(H)
+        fx, fy, cx, cy = intrinsics[:,jj].unbind(dim=-1)
+        d = torch.zeros_like(Z)
+        d[Z.abs() > 0.2] = 1.0 / Z[Z.abs() > 0.2]
+        if isinstance(Gij, SE3):
+            Ja = torch.stack([
+                H,  o,  o,  o,  Z, -Y,
+                o,  H,  o, -Z,  o,  X,
+                o,  o,  H,  Y, -X,  o,
+                o,  o,  o,  o,  o,  o,
+            ], dim=-1).view(1, len(ii), 4, 6)
+        elif isinstance(Gij, Sim3):
+            Ja = torch.stack([
+                H,  o,  o,  o,  Z, -Y,  X,
+                o,  H,  o, -Z,  o,  X,  Y,
+                o,  o,  H,  Y, -X,  o,  Z,
+                o,  o,  o,  o,  o,  o,  o,
+            ], dim=-1).view(1, len(ii), 4, 7)
+        Jp = torch.stack([
+             fx*d,     o, -fx*X*d*d,  o,
+                o,  fy*d, -fy*Y*d*d,  o,
+        ], dim=-1).view(1, len(ii), 2, 4)
+        Jj = torch.matmul(Jp, Ja)
+        Ji = -Gij[:,:,None].adjT(Jj)
+        Jz = torch.matmul(Jp, Gij.matrix()[...,:,3:])
+        return x1, (Z > 0.2).float(), (Ji, Jj, Jz)
+    if valid:
+        return x1, (X1[...,2] > 0.2).float()
+    return x1
+def point_cloud(poses, patches, intrinsics, ix):
+    """ generate point cloud from patches """
+    return poses[:,ix,None,None].inv() * iproj(patches, intrinsics[:,ix])
+def flow_mag(poses, patches, intrinsics, ii, jj, kk, beta=0.3):
+    """ projective transform """
+    coords0 = transform(poses, patches, intrinsics, ii, ii, kk)
+    coords1, val = transform(poses, patches, intrinsics, ii, jj, kk, tonly=False, valid=True)
+    coords2 = transform(poses, patches, intrinsics, ii, jj, kk, tonly=True)
+    flow1 = (coords1 - coords0).norm(dim=-1)
+    flow2 = (coords2 - coords0).norm(dim=-1)
+    return beta * flow1 + (1-beta) * flow2, (val > 0.5)

third-party/DPVO/dpvo/stream.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import os
+import cv2
+import numpy as np
+from multiprocessing import Process, Queue
+from pathlib import Path
+from itertools import chain
+def image_stream(queue, imagedir, calib, stride, skip=0):
+    """ image generator """
+    calib = np.loadtxt(calib, delimiter=" ")
+    fx, fy, cx, cy = calib[:4]
+    K = np.eye(3)
+    K[0,0] = fx
+    K[0,2] = cx
+    K[1,1] = fy
+    K[1,2] = cy
+    img_exts = ["*.png", "*.jpeg", "*.jpg"]
+    image_list = sorted(chain.from_iterable(Path(imagedir).glob(e) for e in img_exts))[skip::stride]
+    assert os.path.exists(imagedir), imagedir
+    for t, imfile in enumerate(image_list):
+        image = cv2.imread(str(imfile))
+        if len(calib) > 4:
+            image = cv2.undistort(image, K, calib[4:])
+        if 0:
+            image = cv2.resize(image, None, fx=0.5, fy=0.5)
+            intrinsics = np.array([fx / 2, fy / 2, cx / 2, cy / 2])
+        else:
+            intrinsics = np.array([fx, fy, cx, cy])
+        h, w, _ = image.shape
+        image = image[:h-h%16, :w-w%16]
+        queue.put((t, image, intrinsics))
+    queue.put((-1, image, intrinsics))
+def video_stream(queue, imagedir, calib, stride, skip=0):
+    """ video generator """
+    calib = np.loadtxt(calib, delimiter=" ")
+    fx, fy, cx, cy = calib[:4]
+    K = np.eye(3)
+    K[0,0] = fx
+    K[0,2] = cx
+    K[1,1] = fy
+    K[1,2] = cy
+    assert os.path.exists(imagedir), imagedir
+    cap = cv2.VideoCapture(imagedir)
+    t = 0
+    for _ in range(skip):
+        ret, image = cap.read()
+    while True:
+        # Capture frame-by-frame
+        for _ in range(stride):
+            ret, image = cap.read()
+            # if frame is read correctly ret is True
+            if not ret:
+                break
+        if not ret:
+            break
+        if len(calib) > 4:
+            image = cv2.undistort(image, K, calib[4:])
+        image = cv2.resize(image, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA)
+        h, w, _ = image.shape
+        image = image[:h-h%16, :w-w%16]
+        intrinsics = np.array([fx*.5, fy*.5, cx*.5, cy*.5])
+        queue.put((t, image, intrinsics))
+        t += 1
+    queue.put((-1, image, intrinsics))
+    cap.release()

third-party/DPVO/dpvo/utils.py ADDED Viewed

	@@ -0,0 +1,88 @@

+from contextlib import ContextDecorator
+import torch
+import torch.nn.functional as F
+all_times = []
+class Timer(ContextDecorator):
+    def __init__(self, name, enabled=True):
+        self.name = name
+        self.enabled = enabled
+        if self.enabled:
+            self.start = torch.cuda.Event(enable_timing=True)
+            self.end = torch.cuda.Event(enable_timing=True)
+    def __enter__(self):
+        if self.enabled:
+            self.start.record()
+    def __exit__(self, type, value, traceback):
+        global all_times
+        if self.enabled:
+            self.end.record()
+            torch.cuda.synchronize()
+            elapsed = self.start.elapsed_time(self.end)
+            all_times.append(elapsed)
+            print(f"{self.name} {elapsed:.03f}")
+def coords_grid(b, n, h, w, **kwargs):
+    """ coordinate grid """
+    x = torch.arange(0, w, dtype=torch.float, **kwargs)
+    y = torch.arange(0, h, dtype=torch.float, **kwargs)
+    coords = torch.stack(torch.meshgrid(y, x, indexing="ij"))
+    return coords[[1,0]].view(1, 1, 2, h, w).repeat(b, n, 1, 1, 1)
+def coords_grid_with_index(d, **kwargs):
+    """ coordinate grid with frame index"""
+    b, n, h, w = d.shape
+    i = torch.ones_like(d)
+    x = torch.arange(0, w, dtype=torch.float, **kwargs)
+    y = torch.arange(0, h, dtype=torch.float, **kwargs)
+    y, x = torch.stack(torch.meshgrid(y, x, indexing="ij"))
+    y = y.view(1, 1, h, w).repeat(b, n, 1, 1)
+    x = x.view(1, 1, h, w).repeat(b, n, 1, 1)
+    coords = torch.stack([x, y, d], dim=2)
+    index = torch.arange(0, n, dtype=torch.float, **kwargs)
+    index = index.view(1, n, 1, 1, 1).repeat(b, 1, 1, h, w)
+    return coords, index
+def patchify(x, patch_size=3):
+    """ extract patches from video """
+    b, n, c, h, w = x.shape
+    x = x.view(b*n, c, h, w)
+    y = F.unfold(x, patch_size)
+    y = y.transpose(1,2)
+    return y.reshape(b, -1, c, patch_size, patch_size)
+def pyramidify(fmap, lvls=[1]):
+    """ turn fmap into a pyramid """
+    b, n, c, h, w = fmap.shape
+    pyramid = []
+    for lvl in lvls:
+        gmap =  F.avg_pool2d(fmap.view(b*n, c, h, w), lvl, stride=lvl)
+        pyramid += [ gmap.view(b, n, c, h//lvl, w//lvl) ]
+    return pyramid
+def all_pairs_exclusive(n, **kwargs):
+    ii, jj = torch.meshgrid(torch.arange(n, **kwargs), torch.arange(n, **kwargs))
+    k = ii != jj
+    return ii[k].reshape(-1), jj[k].reshape(-1)
+def set_depth(patches, depth):
+    patches[...,2,:,:] = depth[...,None,None]
+    return patches
+def flatmeshgrid(*args, **kwargs):
+    grid = torch.meshgrid(*args, **kwargs)
+    return (x.reshape(-1) for x in grid)

third-party/DPVO/environment.yml ADDED Viewed

	@@ -0,0 +1,26 @@

+name: dpvo
+channels:
+  - pyg
+  - pytorch
+  - nvidia
+  - conda-forge
+  - defaults
+dependencies:
+  - pip
+  - python=3.10
+  - pytorch=2.3.1
+  - pytorch-scatter=2.1.2
+  - pytorch-cuda=12.1
+  - torchvision=0.18
+  - pip:
+    - tensorboard
+    - numba
+    - tqdm
+    - einops
+    - pypose
+    - kornia
+    - numpy==1.26.4
+    - plyfile
+    - evo
+    - opencv-python
+    - yacs

third-party/DPVO/evaluate_euroc.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import glob
+import os
+from multiprocessing import Process, Queue
+from pathlib import Path
+import cv2
+import evo.main_ape as main_ape
+import numpy as np
+import torch
+from evo.core import sync
+from evo.core.metrics import PoseRelation
+from evo.core.trajectory import PoseTrajectory3D
+from evo.tools import file_interface
+from dpvo.config import cfg
+from dpvo.dpvo import DPVO
+from dpvo.plot_utils import plot_trajectory
+from dpvo.stream import image_stream
+from dpvo.utils import Timer
+SKIP = 0
+def show_image(image, t=0):
+    image = image.permute(1, 2, 0).cpu().numpy()
+    cv2.imshow('image', image / 255.0)
+    cv2.waitKey(t)
+@torch.no_grad()
+def run(cfg, network, imagedir, calib, stride=1, viz=False, show_img=False):
+    slam = None
+    queue = Queue(maxsize=8)
+    reader = Process(target=image_stream, args=(queue, imagedir, calib, stride, 0))
+    reader.start()
+    while 1:
+        (t, image, intrinsics) = queue.get()
+        if t < 0: break
+        image = torch.from_numpy(image).permute(2,0,1).cuda()
+        intrinsics = torch.from_numpy(intrinsics).cuda()
+        if show_img:
+            show_image(image, 1)
+        if slam is None:
+            slam = DPVO(cfg, network, ht=image.shape[1], wd=image.shape[2], viz=viz)
+        with Timer("SLAM", enabled=False):
+            slam(t, image, intrinsics)
+    reader.join()
+    return slam.terminate()
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--network', type=str, default='dpvo.pth')
+    parser.add_argument('--config', default="config/default.yaml")
+    parser.add_argument('--stride', type=int, default=2)
+    parser.add_argument('--viz', action="store_true")
+    parser.add_argument('--show_img', action="store_true")
+    parser.add_argument('--trials', type=int, default=1)
+    parser.add_argument('--eurocdir', default="datasets/EUROC")
+    parser.add_argument('--backend_thresh', type=float, default=64.0)
+    parser.add_argument('--plot', action="store_true")
+    parser.add_argument('--opts', nargs='+', default=[])
+    parser.add_argument('--save_trajectory', action="store_true")
+    args = parser.parse_args()
+    cfg.merge_from_file(args.config)
+    cfg.BACKEND_THRESH = args.backend_thresh
+    cfg.merge_from_list(args.opts)
+    print("\nRunning with config...")
+    print(cfg, "\n")
+    torch.manual_seed(1234)
+    euroc_scenes = [
+        "MH_01_easy",
+        "MH_02_easy",
+        "MH_03_medium",
+        "MH_04_difficult",
+        "MH_05_difficult",
+        "V1_01_easy",
+        "V1_02_medium",
+        "V1_03_difficult",
+        "V2_01_easy",
+        "V2_02_medium",
+        "V2_03_difficult",
+    ]
+    results = {}
+    for scene in euroc_scenes:
+        imagedir = os.path.join(args.eurocdir, scene, "mav0/cam0/data")
+        groundtruth = "datasets/euroc_groundtruth/{}.txt".format(scene)
+        scene_results = []
+        for i in range(args.trials):
+            traj_est, timestamps = run(cfg, args.network, imagedir, "calib/euroc.txt", args.stride, args.viz, args.show_img)
+            images_list = sorted(glob.glob(os.path.join(imagedir, "*.png")))[::args.stride]
+            tstamps = [float(x.split('/')[-1][:-4]) for x in images_list]
+            traj_est = PoseTrajectory3D(
+                positions_xyz=traj_est[:,:3],
+                orientations_quat_wxyz=traj_est[:, [6, 3, 4, 5]],
+                timestamps=np.array(tstamps))
+            traj_ref = file_interface.read_tum_trajectory_file(groundtruth)
+            traj_ref, traj_est = sync.associate_trajectories(traj_ref, traj_est)
+            result = main_ape.ape(traj_ref, traj_est, est_name='traj',
+                pose_relation=PoseRelation.translation_part, align=True, correct_scale=True)
+            ate_score = result.stats["rmse"]
+            if args.plot:
+                scene_name = '_'.join(scene.split('/')[1:]).title()
+                Path("trajectory_plots").mkdir(exist_ok=True)
+                plot_trajectory(traj_est, traj_ref, f"Euroc {scene} Trial #{i+1} (ATE: {ate_score:.03f})",
+                                f"trajectory_plots/Euroc_{scene}_Trial{i+1:02d}.pdf", align=True, correct_scale=True)
+            if args.save_trajectory:
+                Path("saved_trajectories").mkdir(exist_ok=True)
+                file_interface.write_tum_trajectory_file(f"saved_trajectories/Euroc_{scene}_Trial{i+1:02d}.txt", traj_est)
+            scene_results.append(ate_score)
+        results[scene] = np.median(scene_results)
+        print(scene, sorted(scene_results))
+    xs = []
+    for scene in results:
+        print(scene, results[scene])
+        xs.append(results[scene])
+    print("AVG: ", np.mean(xs))