File size: 5,603 Bytes
20ae9ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#include <torch/extension.h>
#include <vector>
#include <unordered_map>
#include <algorithm>
#include <iostream>
#include <Eigen/Core>
#include <Eigen/Sparse>


std::vector<torch::Tensor> cuda_ba(

    torch::Tensor poses,

    torch::Tensor patches,

    torch::Tensor intrinsics,

    torch::Tensor target,

    torch::Tensor weight,

    torch::Tensor lmbda,

    torch::Tensor ii,

    torch::Tensor jj, 

    torch::Tensor kk,

    const int PPF,

    int t0, int t1, int iterations, bool eff_impl);


torch::Tensor cuda_reproject(

    torch::Tensor poses,

    torch::Tensor patches,

    torch::Tensor intrinsics,

    torch::Tensor ii,

    torch::Tensor jj, 

    torch::Tensor kk);

std::vector<torch::Tensor> ba(

    torch::Tensor poses,

    torch::Tensor patches,

    torch::Tensor intrinsics,

    torch::Tensor target,

    torch::Tensor weight,

    torch::Tensor lmbda,

    torch::Tensor ii,

    torch::Tensor jj, 

    torch::Tensor kk,

    int PPF,

    int t0, int t1, int iterations, bool eff_impl) {
  return cuda_ba(poses, patches, intrinsics, target, weight, lmbda, ii, jj, kk, PPF, t0, t1, iterations, eff_impl);
}


torch::Tensor reproject(

    torch::Tensor poses,

    torch::Tensor patches,

    torch::Tensor intrinsics,

    torch::Tensor ii,

    torch::Tensor jj, 

    torch::Tensor kk) {
  return cuda_reproject(poses, patches, intrinsics, ii, jj, kk);
}


std::vector<torch::Tensor> neighbors(torch::Tensor ii, torch::Tensor jj)

{

  auto tup = torch::_unique(ii, true, true);
  torch::Tensor uniq = std::get<0>(tup).to(torch::kCPU);
  torch::Tensor perm = std::get<1>(tup).to(torch::kCPU);

  jj = jj.to(torch::kCPU);
  auto jj_accessor = jj.accessor<long,1>();

  auto perm_accessor = perm.accessor<long,1>();
  std::vector<std::vector<long>> index(uniq.size(0));
  for (int i=0; i < ii.size(0); i++) {
    index[perm_accessor[i]].push_back(i);
  }

  auto opts = torch::TensorOptions().dtype(torch::kInt64);
  torch::Tensor ix = torch::empty({ii.size(0)}, opts);
  torch::Tensor jx = torch::empty({ii.size(0)}, opts); 

  auto ix_accessor = ix.accessor<long,1>();
  auto jx_accessor = jx.accessor<long,1>();

  for (int i=0; i<uniq.size(0); i++) {
    std::vector<long>& idx = index[i];
    std::stable_sort(idx.begin(), idx.end(),
       [&jj_accessor](size_t i, size_t j) {return jj_accessor[i] < jj_accessor[j];});

    for (int i=0; i < idx.size(); i++) {
      ix_accessor[idx[i]] = (i > 0) ? idx[i-1] : -1;
      jx_accessor[idx[i]] = (i < idx.size() - 1) ? idx[i+1] : -1;
    }
  }

  ix = ix.to(torch::kCUDA);
  jx = jx.to(torch::kCUDA);

  return {ix, jx};
}

typedef Eigen::SparseMatrix<double> SpMat;
typedef Eigen::Triplet<double> T;

Eigen::VectorXd solve(const SpMat &A, const Eigen::VectorXd &b, int freen){

  if (freen < 0){
    const Eigen::SimplicialCholesky<SpMat> chol(A);
    return chol.solve(b); // n x 1
  }
  
  const SpMat A_sub = A.topLeftCorner(freen, freen);
  const Eigen::VectorXd b_sub = b.topRows(freen);
  const Eigen::VectorXd delta = solve(A_sub, b_sub, -7);

  Eigen::VectorXd delta2(b.rows());
  delta2.setZero();
  delta2.topRows(freen) = delta;

  return delta2;
}

std::vector<torch::Tensor> solve_system(torch::Tensor J_Ginv_i, torch::Tensor J_Ginv_j, torch::Tensor ii, torch::Tensor jj, torch::Tensor res, float ep, float lm, int freen)

{

  const torch::Device device = res.device();
  J_Ginv_i = J_Ginv_i.to(torch::kCPU);
  J_Ginv_j = J_Ginv_j.to(torch::kCPU);
  ii = ii.to(torch::kCPU);
  jj = jj.to(torch::kCPU);
  res = res.clone().to(torch::kCPU);

  const int r = res.size(0);
  const int n = std::max(ii.max().item<long>(), jj.max().item<long>()) + 1;

  res.resize_({r*7});
  float *res_ptr = res.data_ptr<float>();
  Eigen::Map<Eigen::VectorXf> v(res_ptr, r*7);

  SpMat J(r*7, n*7);
  std::vector<T> tripletList;
  tripletList.reserve(r*7*7*2);

  auto ii_acc = ii.accessor<long,1>();
  auto jj_acc = jj.accessor<long,1>();
  auto J_Ginv_i_acc = J_Ginv_i.accessor<float,3>();
  auto J_Ginv_j_acc = J_Ginv_j.accessor<float,3>();

  for (int x=0; x<r; x++){
    const int i = ii_acc[x];
    const int j = jj_acc[x];
    for (int k=0; k<7; k++){
      for (int l=0; l<7; l++){
        if (i == j)
          exit(1);
        const float val_i = J_Ginv_i_acc[x][k][l];
        tripletList.emplace_back(x*7 + k, i*7 + l, val_i);
        const float val_j = J_Ginv_j_acc[x][k][l];
        tripletList.emplace_back(x*7 + k, j*7 + l, val_j);
      }
    }
  }

  J.setFromTriplets(tripletList.begin(), tripletList.end());
  const SpMat Jt = J.transpose();
  Eigen::VectorXd b = -(Jt * v.cast<double>());
  SpMat A = Jt * J;

  A.diagonal() += (A.diagonal() * lm);
  A.diagonal().array() += ep;
  Eigen::VectorXf delta = solve(A, b, freen*7).cast<float>();

  torch::Tensor delta_tensor = torch::from_blob(delta.data(), {n*7}).clone().to(device);
  delta_tensor.resize_({n, 7});
  return {delta_tensor};

  Eigen::Matrix<float, -1, -1, Eigen::RowMajor> dense_J(J.cast<float>());
  torch::Tensor dense_J_tensor = torch::from_blob(dense_J.data(), {r*7, n*7}).clone().to(device);
  dense_J_tensor.resize_({r, 7, n, 7});

  return {delta_tensor, dense_J_tensor};

}


PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
  m.def("forward", &ba, "BA forward operator");
  m.def("neighbors", &neighbors, "temporal neighboor indicies");
  m.def("reproject", &reproject, "temporal neighboor indicies");
  m.def("solve_system", &solve_system, "temporal neighboor indicies");

}