Spaces:
Running
Running
// Copyright (c) OpenMMLab. All rights reserved. | |
template <typename T, typename T_int> | |
void dynamic_voxelize_forward_cpu_kernel( | |
const torch::TensorAccessor<T, 2> points, | |
torch::TensorAccessor<T_int, 2> coors, const std::vector<float> voxel_size, | |
const std::vector<float> coors_range, const std::vector<int> grid_size, | |
const int num_points, const int num_features, const int NDim) { | |
const int ndim_minus_1 = NDim - 1; | |
bool failed = false; | |
// int coor[NDim]; | |
int* coor = new int[NDim](); | |
int c; | |
for (int i = 0; i < num_points; ++i) { | |
failed = false; | |
for (int j = 0; j < NDim; ++j) { | |
c = floor((points[i][j] - coors_range[j]) / voxel_size[j]); | |
// necessary to rm points out of range | |
if ((c < 0 || c >= grid_size[j])) { | |
failed = true; | |
break; | |
} | |
coor[ndim_minus_1 - j] = c; | |
} | |
// memcpy and memset will cause problem because of the memory distribution | |
// discontinuity of TensorAccessor, so here using loops to replace memcpy | |
// or memset | |
if (failed) { | |
for (int k = 0; k < NDim; ++k) { | |
coors[i][k] = -1; | |
} | |
} else { | |
for (int k = 0; k < NDim; ++k) { | |
coors[i][k] = coor[k]; | |
} | |
} | |
} | |
delete[] coor; | |
return; | |
} | |
template <typename T, typename T_int> | |
void hard_voxelize_forward_cpu_kernel( | |
const torch::TensorAccessor<T, 2> points, | |
torch::TensorAccessor<T, 3> voxels, torch::TensorAccessor<T_int, 2> coors, | |
torch::TensorAccessor<T_int, 1> num_points_per_voxel, | |
torch::TensorAccessor<T_int, 3> coor_to_voxelidx, int& voxel_num, | |
const std::vector<float> voxel_size, const std::vector<float> coors_range, | |
const std::vector<int> grid_size, const int max_points, | |
const int max_voxels, const int num_points, const int num_features, | |
const int NDim) { | |
// declare a temp coors | |
at::Tensor temp_coors = at::zeros( | |
{num_points, NDim}, at::TensorOptions().dtype(at::kInt).device(at::kCPU)); | |
// First use dynamic voxelization to get coors, | |
// then check max points/voxels constraints | |
dynamic_voxelize_forward_cpu_kernel<T, int>( | |
points, temp_coors.accessor<int, 2>(), voxel_size, coors_range, grid_size, | |
num_points, num_features, NDim); | |
int voxelidx, num; | |
auto coor = temp_coors.accessor<int, 2>(); | |
for (int i = 0; i < num_points; ++i) { | |
// T_int* coor = temp_coors.data_ptr<int>() + i * NDim; | |
if (coor[i][0] == -1) continue; | |
voxelidx = coor_to_voxelidx[coor[i][0]][coor[i][1]][coor[i][2]]; | |
// record voxel | |
if (voxelidx == -1) { | |
voxelidx = voxel_num; | |
if (max_voxels != -1 && voxel_num >= max_voxels) continue; | |
voxel_num += 1; | |
coor_to_voxelidx[coor[i][0]][coor[i][1]][coor[i][2]] = voxelidx; | |
// memcpy will cause problem because of the memory distribution | |
// discontinuity of TensorAccessor, so here using loops to replace memcpy | |
for (int k = 0; k < NDim; ++k) { | |
coors[voxelidx][k] = coor[i][k]; | |
} | |
} | |
// put points into voxel | |
num = num_points_per_voxel[voxelidx]; | |
if (max_points == -1 || num < max_points) { | |
// memcpy will cause problem because of the memory distribution | |
// discontinuity of TensorAccessor, so here using loops to replace memcpy | |
for (int k = 0; k < num_features; ++k) { | |
voxels[voxelidx][num][k] = points[i][k]; | |
} | |
num_points_per_voxel[voxelidx] += 1; | |
} | |
} | |
return; | |
} | |
void dynamic_voxelize_forward_cpu(const at::Tensor& points, at::Tensor& coors, | |
const std::vector<float> voxel_size, | |
const std::vector<float> coors_range, | |
const int NDim = 3) { | |
// check device | |
AT_ASSERTM(points.device().is_cpu(), "points must be a CPU tensor"); | |
std::vector<int> grid_size(NDim); | |
const int num_points = points.size(0); | |
const int num_features = points.size(1); | |
for (int i = 0; i < NDim; ++i) { | |
grid_size[i] = | |
round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]); | |
} | |
// coors, num_points_per_voxel, coor_to_voxelidx are int Tensor | |
AT_DISPATCH_FLOATING_TYPES_AND_HALF( | |
points.scalar_type(), "dynamic_voxelize_forward_cpu_kernel", [&] { | |
dynamic_voxelize_forward_cpu_kernel<scalar_t, int>( | |
points.accessor<scalar_t, 2>(), coors.accessor<int, 2>(), | |
voxel_size, coors_range, grid_size, num_points, num_features, NDim); | |
}); | |
} | |
int hard_voxelize_forward_cpu(const at::Tensor& points, at::Tensor& voxels, | |
at::Tensor& coors, | |
at::Tensor& num_points_per_voxel, | |
const std::vector<float> voxel_size, | |
const std::vector<float> coors_range, | |
const int max_points, const int max_voxels, | |
const int NDim = 3) { | |
// current version tooks about 0.02s_0.03s for one frame on cpu | |
// check device | |
AT_ASSERTM(points.device().is_cpu(), "points must be a CPU tensor"); | |
std::vector<int> grid_size(NDim); | |
const int num_points = points.size(0); | |
const int num_features = points.size(1); | |
for (int i = 0; i < NDim; ++i) { | |
grid_size[i] = | |
round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]); | |
} | |
// coors, num_points_per_voxel, coor_to_voxelidx are int Tensor | |
// printf("cpu coor_to_voxelidx size: [%d, %d, %d]\n", grid_size[2], | |
// grid_size[1], grid_size[0]); | |
at::Tensor coor_to_voxelidx = | |
-at::ones({grid_size[2], grid_size[1], grid_size[0]}, coors.options()); | |
int voxel_num = 0; | |
AT_DISPATCH_FLOATING_TYPES_AND_HALF( | |
points.scalar_type(), "hard_voxelize_forward_cpu_kernel", [&] { | |
hard_voxelize_forward_cpu_kernel<scalar_t, int>( | |
points.accessor<scalar_t, 2>(), voxels.accessor<scalar_t, 3>(), | |
coors.accessor<int, 2>(), num_points_per_voxel.accessor<int, 1>(), | |
coor_to_voxelidx.accessor<int, 3>(), voxel_num, voxel_size, | |
coors_range, grid_size, max_points, max_voxels, num_points, | |
num_features, NDim); | |
}); | |
return voxel_num; | |
} | |
int hard_voxelize_forward_impl(const at::Tensor& points, at::Tensor& voxels, | |
at::Tensor& coors, | |
at::Tensor& num_points_per_voxel, | |
const std::vector<float> voxel_size, | |
const std::vector<float> coors_range, | |
const int max_points, const int max_voxels, | |
const int NDim); | |
void dynamic_voxelize_forward_impl(const at::Tensor& points, at::Tensor& coors, | |
const std::vector<float> voxel_size, | |
const std::vector<float> coors_range, | |
const int NDim); | |
REGISTER_DEVICE_IMPL(hard_voxelize_forward_impl, CPU, | |
hard_voxelize_forward_cpu); | |
REGISTER_DEVICE_IMPL(dynamic_voxelize_forward_impl, CPU, | |
dynamic_voxelize_forward_cpu); | |