Spaces:
Running
Running
// Copyright 2019 Yan Yan | |
// | |
// Licensed under the Apache License, Version 2.0 (the "License"); | |
// you may not use this file except in compliance with the License. | |
// You may obtain a copy of the License at | |
// | |
// http://www.apache.org/licenses/LICENSE-2.0 | |
// | |
// Unless required by applicable law or agreed to in writing, software | |
// distributed under the License is distributed on an "AS IS" BASIS, | |
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
// See the License for the specific language governing permissions and | |
// limitations under the License. | |
template <typename Index, typename IndexGrid, unsigned NDim, | |
int KernelMaxVolume = 256> | |
__global__ void prepareIndicePairsKernel( | |
tv::TensorView<const Index> indicesIn, tv::TensorView<Index> indicesOut, | |
tv::TensorView<IndexGrid> gridsOut, tv::TensorView<Index> indicePairs, | |
tv::TensorView<Index> indiceNum, tv::TensorView<Index> indicePairUnique, | |
const tv::SimpleVector<Index, NDim> kernelSize, | |
const tv::SimpleVector<Index, NDim> stride, | |
const tv::SimpleVector<Index, NDim> padding, | |
const tv::SimpleVector<Index, NDim> dilation, | |
const tv::SimpleVector<Index, NDim> outSpatialShape) { | |
auto numActIn = indicesIn.dim(0); | |
Index spatialVolume = 1; | |
for (int i = 0; i < NDim; ++i) { | |
spatialVolume *= outSpatialShape[i]; | |
} | |
Index kernelVolume = 1; | |
for (int i = 0; i < NDim; ++i) { | |
kernelVolume *= kernelSize[i]; | |
} | |
Index numValidPoints = 0; | |
Index validPoints[KernelMaxVolume * (NDim + 1)]; | |
Index *pointPtr = nullptr; | |
auto indicePairsDim2 = indicePairs.dim(2); | |
Index index; | |
for (int ix : tv::KernelLoopX<int>(numActIn)) { | |
numValidPoints = getValidOutPos<Index, NDim>( | |
indicesIn.data() + ix * (NDim + 1) + 1, kernelSize.data(), | |
stride.data(), padding.data(), dilation.data(), outSpatialShape.data(), | |
validPoints); | |
for (Index i = 0; i < numValidPoints; ++i) { | |
pointPtr = validPoints + i * (NDim + 1); | |
auto offset = pointPtr[NDim]; | |
auto oldNum = atomicAdd(indiceNum.data() + offset, Index(1)); | |
indicePairs(offset, 0, oldNum) = ix; | |
index = tv::rowArrayIdx<Index, NDim>(pointPtr, outSpatialShape.data()) + | |
spatialVolume * indicesIn(ix, 0); | |
indicePairs(offset, 1, oldNum) = index; | |
indicePairUnique[offset * indicePairsDim2 + oldNum] = index; | |
} | |
} | |
} | |
template <typename Index, typename IndexGrid, unsigned NDim, | |
int KernelMaxVolume = 256> | |
__global__ void prepareDeConvIndicePairsKernel( | |
tv::TensorView<const Index> indicesIn, tv::TensorView<Index> indicesOut, | |
tv::TensorView<IndexGrid> gridsOut, tv::TensorView<Index> indicePairs, | |
tv::TensorView<Index> indiceNum, tv::TensorView<Index> indicePairUnique, | |
const tv::SimpleVector<Index, NDim> kernelSize, | |
const tv::SimpleVector<Index, NDim> stride, | |
const tv::SimpleVector<Index, NDim> padding, | |
const tv::SimpleVector<Index, NDim> dilation, | |
const tv::SimpleVector<Index, NDim> outSpatialShape) { | |
auto numActIn = indicesIn.dim(0); | |
Index spatialVolume = 1; | |
for (int i = 0; i < NDim; ++i) { | |
spatialVolume *= outSpatialShape[i]; | |
} | |
Index kernelVolume = 1; | |
for (int i = 0; i < NDim; ++i) { | |
kernelVolume *= kernelSize[i]; | |
} | |
Index numValidPoints = 0; | |
Index validPoints[KernelMaxVolume * (NDim + 1)]; | |
Index *pointPtr = nullptr; | |
auto indicePairsDim2 = indicePairs.dim(2); | |
Index index; | |
for (int ix : tv::KernelLoopX<int>(numActIn)) { | |
numValidPoints = getValidOutPosTranspose<Index, NDim>( | |
indicesIn.data() + ix * (NDim + 1) + 1, kernelSize.data(), | |
stride.data(), padding.data(), dilation.data(), outSpatialShape.data(), | |
validPoints); | |
for (Index i = 0; i < numValidPoints; ++i) { | |
pointPtr = validPoints + i * (NDim + 1); | |
auto offset = pointPtr[NDim]; | |
auto oldNum = atomicAdd(indiceNum.data() + offset, Index(1)); | |
indicePairs(offset, 0, oldNum) = ix; | |
index = tv::rowArrayIdx<Index, NDim>(pointPtr, outSpatialShape.data()) + | |
spatialVolume * indicesIn(ix, 0); | |
indicePairs(offset, 1, oldNum) = index; | |
indicePairUnique[offset * indicePairsDim2 + oldNum] = index; | |
} | |
} | |
} | |
template <typename Index, typename IndexGrid, unsigned NDim> | |
__global__ void assignGridAndIndiceOutKernel( | |
tv::TensorView<Index> indicesOut, tv::TensorView<IndexGrid> gridsOut, | |
int numAct, tv::TensorView<Index> indicePairs, | |
tv::TensorView<Index> indicePairUnique, | |
const tv::SimpleVector<Index, NDim> outSpatialShape, int batchSize) { | |
Index index; | |
auto indicesOutPtr = indicesOut.data(); | |
for (int ix : tv::KernelLoopX<int>(numAct)) { | |
index = indicePairUnique[ix]; | |
gridsOut[index] = ix; | |
index = tv::rowArrayIdxInv<Index, NDim>( | |
index, indicesOutPtr + ix * (NDim + 1) + 1, outSpatialShape.data()); | |
indicesOut[ix * (NDim + 1)] = index % batchSize; | |
} | |
} | |
template <typename Index, typename IndexGrid, unsigned NDim> | |
__global__ void assignIndicePairsKernel( | |
tv::TensorView<Index> indicesOut, tv::TensorView<IndexGrid> gridsOut, | |
int numActIn, tv::TensorView<Index> indicePairs, | |
tv::TensorView<Index> indicePairUnique, | |
const tv::SimpleVector<Index, NDim> outSpatialShape) { | |
Index index; | |
int kernelVolume = indicePairs.dim(0); | |
for (int ix : tv::KernelLoopX<int>(numActIn)) { | |
for (int i = 0; i < kernelVolume; ++i) { | |
index = indicePairs(i, 1, ix); | |
if (index > -1) { | |
indicePairs(i, 1, ix) = gridsOut[index]; | |
} | |
} | |
} | |
} | |
template <typename Index, typename IndexGrid, unsigned NDim> | |
__global__ void prepareSubMGridKernel( | |
tv::TensorView<const Index> indicesIn, tv::TensorView<IndexGrid> gridsOut, | |
const tv::SimpleVector<Index, NDim> outSpatialShape) { | |
auto numActIn = indicesIn.dim(0); | |
Index spatialVolume = 1; | |
for (int i = 0; i < NDim; ++i) { | |
spatialVolume *= outSpatialShape[i]; | |
} | |
Index index = 0; | |
for (int ix : tv::KernelLoopX<int>(numActIn)) { | |
index = tv::rowArrayIdx<Index, NDim>(indicesIn.data() + ix * (NDim + 1) + 1, | |
outSpatialShape.data()) + | |
spatialVolume * indicesIn(ix, 0); | |
gridsOut[index] = ix; | |
} | |
} | |
template <typename Index, typename IndexGrid, unsigned NDim, | |
int KernelMaxVolume = 256> | |
__global__ void getSubMIndicePairsKernel( | |
tv::TensorView<const Index> indicesIn, tv::TensorView<IndexGrid> gridsOut, | |
tv::TensorView<Index> indicePairs, tv::TensorView<Index> indiceNum, | |
const tv::SimpleVector<Index, NDim> kernelSize, | |
const tv::SimpleVector<Index, NDim> stride, | |
const tv::SimpleVector<Index, NDim> padding, | |
const tv::SimpleVector<Index, NDim> dilation, | |
const tv::SimpleVector<Index, NDim> outSpatialShape) { | |
auto numActIn = indicesIn.dim(0); | |
Index spatialVolume = 1; | |
for (int i = 0; i < NDim; ++i) { | |
spatialVolume *= outSpatialShape[i]; | |
} | |
Index numValidPoints = 0; | |
Index validPoints[KernelMaxVolume * (NDim + 1)]; | |
Index *pointPtr = nullptr; | |
Index index = 0; | |
for (int ix : tv::KernelLoopX<int>(numActIn)) { | |
numValidPoints = getValidOutPos<Index, NDim>( | |
indicesIn.data() + ix * (NDim + 1) + 1, kernelSize.data(), | |
stride.data(), padding.data(), dilation.data(), outSpatialShape.data(), | |
validPoints); | |
for (int i = 0; i < numValidPoints; ++i) { | |
pointPtr = validPoints + i * (NDim + 1); | |
auto offset = pointPtr[NDim]; | |
index = tv::rowArrayIdx<Index, NDim>(pointPtr, outSpatialShape.data()) + | |
spatialVolume * indicesIn(ix, 0); | |
if (gridsOut[index] > -1) { | |
auto oldNum = atomicAdd(indiceNum.data() + offset, Index(1)); | |
indicePairs(offset, 1, oldNum) = gridsOut[index]; | |
indicePairs(offset, 0, oldNum) = ix; | |
} | |
} | |
} | |
} | |
template <typename Index, typename IndexGrid, unsigned NDim> | |
__global__ void resetGridKernel(const Index *indicePairUnique, | |
tv::TensorView<IndexGrid> gridsOut, | |
int numAct) { | |
for (int ix : tv::KernelLoopX<int>(numAct)) { | |
gridsOut[indicePairUnique[ix]] = -1; | |
} | |
} | |
template <typename Index, typename IndexGrid, unsigned NDim> | |
__global__ void resetGridSubMKernel( | |
const Index *indices, tv::TensorView<IndexGrid> gridsOut, | |
const tv::SimpleVector<Index, NDim> outSpatialShape, int numAct) { | |
int outSpatialShapeReg[NDim]; | |
for (int i = 0; i < NDim; ++i) { | |
outSpatialShapeReg[i] = outSpatialShape[i]; | |
} | |
Index spatialVolume = 1; | |
auto indsPtr = indices; | |
for (int i = 0; i < NDim; ++i) { | |
spatialVolume *= outSpatialShape[i]; | |
} | |
Index index; | |
for (int ix : tv::KernelLoopX<int>(numAct)) { | |
indsPtr = indices + ix * (NDim + 1); | |
index = tv::rowArrayIdx<Index, NDim>(indsPtr + 1, outSpatialShapeReg); | |
gridsOut[index + spatialVolume * indsPtr[0]] = -1; | |
} | |
} | |