LIVE / thrust /dependencies /cub /test /test_iterator.cu
Xu Ma
update
1c3c0d9
raw
history blame
25.6 kB
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/
/******************************************************************************
* Test of iterator utilities
******************************************************************************/
// Ensure printing of CUDA runtime errors to console
#define CUB_STDERR
#include <iterator>
#include <stdio.h>
#include <typeinfo>
#include <cub/iterator/arg_index_input_iterator.cuh>
#include <cub/iterator/cache_modified_input_iterator.cuh>
#include <cub/iterator/cache_modified_output_iterator.cuh>
#include <cub/iterator/constant_input_iterator.cuh>
#include <cub/iterator/counting_input_iterator.cuh>
#include <cub/iterator/tex_obj_input_iterator.cuh>
#include <cub/iterator/tex_ref_input_iterator.cuh>
#include <cub/iterator/transform_input_iterator.cuh>
#include <cub/util_type.cuh>
#include <cub/util_allocator.cuh>
#include "test_util.h"
#include <thrust/device_ptr.h>
#include <thrust/copy.h>
using namespace cub;
//---------------------------------------------------------------------
// Globals, constants and typedefs
//---------------------------------------------------------------------
bool g_verbose = false;
CachingDeviceAllocator g_allocator(true);
// Dispatch types
enum Backend
{
CUB, // CUB method
THRUST, // Thrust method
CDP, // GPU-based (dynamic parallelism) dispatch to CUB method
};
template <typename T>
struct TransformOp
{
// Increment transform
__host__ __device__ __forceinline__ T operator()(T input) const
{
T addend;
InitValue(INTEGER_SEED, addend, 1);
return input + addend;
}
};
struct SelectOp
{
template <typename T>
__host__ __device__ __forceinline__ bool operator()(T input)
{
return true;
}
};
//---------------------------------------------------------------------
// Test kernels
//---------------------------------------------------------------------
/**
* Test random access input iterator
*/
template <
typename InputIteratorT,
typename T>
__global__ void Kernel(
InputIteratorT d_in,
T *d_out,
InputIteratorT *d_itrs)
{
d_out[0] = *d_in; // Value at offset 0
d_out[1] = d_in[100]; // Value at offset 100
d_out[2] = *(d_in + 1000); // Value at offset 1000
d_out[3] = *(d_in + 10000); // Value at offset 10000
d_in++;
d_out[4] = d_in[0]; // Value at offset 1
d_in += 20;
d_out[5] = d_in[0]; // Value at offset 21
d_itrs[0] = d_in; // Iterator at offset 21
d_in -= 10;
d_out[6] = d_in[0]; // Value at offset 11;
d_in -= 11;
d_out[7] = d_in[0]; // Value at offset 0
d_itrs[1] = d_in; // Iterator at offset 0
}
//---------------------------------------------------------------------
// Host testing subroutines
//---------------------------------------------------------------------
/**
* Run iterator test on device
*/
template <
typename InputIteratorT,
typename T,
int TEST_VALUES>
void Test(
InputIteratorT d_in,
T (&h_reference)[TEST_VALUES])
{
// Allocate device arrays
T *d_out = NULL;
InputIteratorT *d_itrs = NULL;
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(T) * TEST_VALUES));
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_itrs, sizeof(InputIteratorT) * 2));
int compare;
// Run unguarded kernel
Kernel<<<1, 1>>>(d_in, d_out, d_itrs);
CubDebugExit(cudaPeekAtLastError());
CubDebugExit(cudaDeviceSynchronize());
// Check results
compare = CompareDeviceResults(h_reference, d_out, TEST_VALUES, g_verbose, g_verbose);
printf("\tValues: %s\n", (compare) ? "FAIL" : "PASS");
AssertEquals(0, compare);
// Check iterator at offset 21
InputIteratorT h_itr = d_in + 21;
compare = CompareDeviceResults(&h_itr, d_itrs, 1, g_verbose, g_verbose);
printf("\tIterators: %s\n", (compare) ? "FAIL" : "PASS");
AssertEquals(0, compare);
// Check iterator at offset 0
compare = CompareDeviceResults(&d_in, d_itrs + 1, 1, g_verbose, g_verbose);
printf("\tIterators: %s\n", (compare) ? "FAIL" : "PASS");
AssertEquals(0, compare);
// Cleanup
if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out));
if (d_itrs) CubDebugExit(g_allocator.DeviceFree(d_itrs));
}
/**
* Test constant iterator
*/
template <typename T>
void TestConstant(T base)
{
printf("\nTesting constant iterator on type %s (base: %lld)\n", typeid(T).name(), (unsigned long long) (base)); fflush(stdout);
//
// Test iterator manipulation in kernel
//
T h_reference[8] = {base, base, base, base, base, base, base, base};
ConstantInputIterator<T> d_itr(base);
Test(d_itr, h_reference);
#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer
//
// Test with thrust::copy_if()
//
int copy_items = 100;
T *h_copy = new T[copy_items];
T *d_copy = NULL;
for (int i = 0; i < copy_items; ++i)
h_copy[i] = d_itr[i];
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * copy_items));
thrust::device_ptr<T> d_copy_wrapper(d_copy);
thrust::copy_if(d_itr, d_itr + copy_items, d_copy_wrapper, SelectOp());
int compare = CompareDeviceResults(h_copy, d_copy, copy_items, g_verbose, g_verbose);
printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS");
AssertEquals(0, compare);
if (h_copy) delete[] h_copy;
if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy));
#endif // THRUST_VERSION
}
/**
* Test counting iterator
*/
template <typename T>
void TestCounting(T base)
{
printf("\nTesting counting iterator on type %s (base: %d) \n", typeid(T).name(), int(base)); fflush(stdout);
//
// Test iterator manipulation in kernel
//
// Initialize reference data
T h_reference[8];
h_reference[0] = base + 0; // Value at offset 0
h_reference[1] = base + 100; // Value at offset 100
h_reference[2] = base + 1000; // Value at offset 1000
h_reference[3] = base + 10000; // Value at offset 10000
h_reference[4] = base + 1; // Value at offset 1
h_reference[5] = base + 21; // Value at offset 21
h_reference[6] = base + 11; // Value at offset 11
h_reference[7] = base + 0; // Value at offset 0;
CountingInputIterator<T> d_itr(base);
Test(d_itr, h_reference);
#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer
//
// Test with thrust::copy_if()
//
unsigned long long max_items = ((1ull << ((sizeof(T) * 8) - 1)) - 1);
size_t copy_items = (size_t) CUB_MIN(max_items - base, 100); // potential issue with differencing overflows when T is a smaller type than can handle the offset
T *h_copy = new T[copy_items];
T *d_copy = NULL;
for (unsigned long long i = 0; i < copy_items; ++i)
h_copy[i] = d_itr[i];
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * copy_items));
thrust::device_ptr<T> d_copy_wrapper(d_copy);
thrust::copy_if(d_itr, d_itr + copy_items, d_copy_wrapper, SelectOp());
int compare = CompareDeviceResults(h_copy, d_copy, copy_items, g_verbose, g_verbose);
printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS");
AssertEquals(0, compare);
if (h_copy) delete[] h_copy;
if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy));
#endif // THRUST_VERSION
}
/**
* Test modified iterator
*/
template <typename T, typename CastT>
void TestModified()
{
printf("\nTesting cache-modified iterator on type %s\n", typeid(T).name()); fflush(stdout);
//
// Test iterator manipulation in kernel
//
constexpr int TEST_VALUES = 11000;
T *h_data = new T[TEST_VALUES];
for (int i = 0; i < TEST_VALUES; ++i)
{
RandomBits(h_data[i]);
}
// Allocate device arrays
T *d_data = NULL;
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES));
CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice));
// Initialize reference data
T h_reference[8];
h_reference[0] = h_data[0]; // Value at offset 0
h_reference[1] = h_data[100]; // Value at offset 100
h_reference[2] = h_data[1000]; // Value at offset 1000
h_reference[3] = h_data[10000]; // Value at offset 10000
h_reference[4] = h_data[1]; // Value at offset 1
h_reference[5] = h_data[21]; // Value at offset 21
h_reference[6] = h_data[11]; // Value at offset 11
h_reference[7] = h_data[0]; // Value at offset 0;
Test(CacheModifiedInputIterator<LOAD_DEFAULT, T>((CastT*) d_data), h_reference);
Test(CacheModifiedInputIterator<LOAD_CA, T>((CastT*) d_data), h_reference);
Test(CacheModifiedInputIterator<LOAD_CG, T>((CastT*) d_data), h_reference);
Test(CacheModifiedInputIterator<LOAD_CS, T>((CastT*) d_data), h_reference);
Test(CacheModifiedInputIterator<LOAD_CV, T>((CastT*) d_data), h_reference);
Test(CacheModifiedInputIterator<LOAD_LDG, T>((CastT*) d_data), h_reference);
Test(CacheModifiedInputIterator<LOAD_VOLATILE, T>((CastT*) d_data), h_reference);
#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer
//
// Test with thrust::copy_if()
//
T *d_copy = NULL;
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES));
CacheModifiedInputIterator<LOAD_CG, T> d_in_itr((CastT*) d_data);
CacheModifiedOutputIterator<STORE_CG, T> d_out_itr((CastT*) d_copy);
thrust::copy_if(d_in_itr, d_in_itr + TEST_VALUES, d_out_itr, SelectOp());
int compare = CompareDeviceResults(h_data, d_copy, TEST_VALUES, g_verbose, g_verbose);
printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS");
AssertEquals(0, compare);
// Cleanup
if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy));
#endif // THRUST_VERSION
if (h_data) delete[] h_data;
if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data));
}
/**
* Test transform iterator
*/
template <typename T, typename CastT>
void TestTransform()
{
printf("\nTesting transform iterator on type %s\n", typeid(T).name()); fflush(stdout);
//
// Test iterator manipulation in kernel
//
constexpr int TEST_VALUES = 11000;
T *h_data = new T[TEST_VALUES];
for (int i = 0; i < TEST_VALUES; ++i)
{
InitValue(INTEGER_SEED, h_data[i], i);
}
// Allocate device arrays
T *d_data = NULL;
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES));
CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice));
TransformOp<T> op;
// Initialize reference data
T h_reference[8];
h_reference[0] = op(h_data[0]); // Value at offset 0
h_reference[1] = op(h_data[100]); // Value at offset 100
h_reference[2] = op(h_data[1000]); // Value at offset 1000
h_reference[3] = op(h_data[10000]); // Value at offset 10000
h_reference[4] = op(h_data[1]); // Value at offset 1
h_reference[5] = op(h_data[21]); // Value at offset 21
h_reference[6] = op(h_data[11]); // Value at offset 11
h_reference[7] = op(h_data[0]); // Value at offset 0;
TransformInputIterator<T, TransformOp<T>, CastT*> d_itr((CastT*) d_data, op);
Test(d_itr, h_reference);
#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer
//
// Test with thrust::copy_if()
//
T *h_copy = new T[TEST_VALUES];
for (int i = 0; i < TEST_VALUES; ++i)
h_copy[i] = op(h_data[i]);
T *d_copy = NULL;
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES));
thrust::device_ptr<T> d_copy_wrapper(d_copy);
thrust::copy_if(d_itr, d_itr + TEST_VALUES, d_copy_wrapper, SelectOp());
int compare = CompareDeviceResults(h_copy, d_copy, TEST_VALUES, g_verbose, g_verbose);
printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS");
AssertEquals(0, compare);
// Cleanup
if (h_copy) delete[] h_copy;
if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy));
#endif // THRUST_VERSION
if (h_data) delete[] h_data;
if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data));
}
/**
* Test tex-obj texture iterator
*/
template <typename T, typename CastT>
void TestTexObj()
{
printf("\nTesting tex-obj iterator on type %s\n", typeid(T).name()); fflush(stdout);
//
// Test iterator manipulation in kernel
//
const unsigned int TEST_VALUES = 11000;
const unsigned int DUMMY_OFFSET = 500;
const unsigned int DUMMY_TEST_VALUES = TEST_VALUES - DUMMY_OFFSET;
T *h_data = new T[TEST_VALUES];
for (int i = 0; i < TEST_VALUES; ++i)
{
RandomBits(h_data[i]);
}
// Allocate device arrays
T *d_data = NULL;
T *d_dummy = NULL;
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES));
CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice));
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_dummy, sizeof(T) * DUMMY_TEST_VALUES));
CubDebugExit(cudaMemcpy(d_dummy, h_data + DUMMY_OFFSET, sizeof(T) * DUMMY_TEST_VALUES, cudaMemcpyHostToDevice));
// Initialize reference data
T h_reference[8];
h_reference[0] = h_data[0]; // Value at offset 0
h_reference[1] = h_data[100]; // Value at offset 100
h_reference[2] = h_data[1000]; // Value at offset 1000
h_reference[3] = h_data[10000]; // Value at offset 10000
h_reference[4] = h_data[1]; // Value at offset 1
h_reference[5] = h_data[21]; // Value at offset 21
h_reference[6] = h_data[11]; // Value at offset 11
h_reference[7] = h_data[0]; // Value at offset 0;
// Create and bind obj-based test iterator
TexObjInputIterator<T> d_obj_itr;
CubDebugExit(d_obj_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES));
Test(d_obj_itr, h_reference);
#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer
//
// Test with thrust::copy_if()
//
T *d_copy = NULL;
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES));
thrust::device_ptr<T> d_copy_wrapper(d_copy);
CubDebugExit(cudaMemset(d_copy, 0, sizeof(T) * TEST_VALUES));
thrust::copy_if(d_obj_itr, d_obj_itr + TEST_VALUES, d_copy_wrapper, SelectOp());
int compare = CompareDeviceResults(h_data, d_copy, TEST_VALUES, g_verbose, g_verbose);
printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS");
AssertEquals(0, compare);
// Cleanup
CubDebugExit(d_obj_itr.UnbindTexture());
if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy));
#endif // THRUST_VERSION
if (h_data) delete[] h_data;
if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data));
if (d_dummy) CubDebugExit(g_allocator.DeviceFree(d_dummy));
}
#if CUDART_VERSION >= 5050
/**
* Test tex-ref texture iterator
*/
template <typename T, typename CastT>
void TestTexRef()
{
printf("\nTesting tex-ref iterator on type %s\n", typeid(T).name()); fflush(stdout);
//
// Test iterator manipulation in kernel
//
constexpr int TEST_VALUES = 11000;
constexpr unsigned int DUMMY_OFFSET = 500;
constexpr unsigned int DUMMY_TEST_VALUES = TEST_VALUES - DUMMY_OFFSET;
T *h_data = new T[TEST_VALUES];
for (int i = 0; i < TEST_VALUES; ++i)
{
RandomBits(h_data[i]);
}
// Allocate device arrays
T *d_data = NULL;
T *d_dummy = NULL;
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES));
CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice));
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_dummy, sizeof(T) * DUMMY_TEST_VALUES));
CubDebugExit(cudaMemcpy(d_dummy, h_data + DUMMY_OFFSET, sizeof(T) * DUMMY_TEST_VALUES, cudaMemcpyHostToDevice));
// Initialize reference data
T h_reference[8];
h_reference[0] = h_data[0]; // Value at offset 0
h_reference[1] = h_data[100]; // Value at offset 100
h_reference[2] = h_data[1000]; // Value at offset 1000
h_reference[3] = h_data[10000]; // Value at offset 10000
h_reference[4] = h_data[1]; // Value at offset 1
h_reference[5] = h_data[21]; // Value at offset 21
h_reference[6] = h_data[11]; // Value at offset 11
h_reference[7] = h_data[0]; // Value at offset 0;
// Create and bind ref-based test iterator
TexRefInputIterator<T, __LINE__> d_ref_itr;
CubDebugExit(d_ref_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES));
// Create and bind dummy iterator of same type to check with interferance
TexRefInputIterator<T, __LINE__> d_ref_itr2;
CubDebugExit(d_ref_itr2.BindTexture((CastT*) d_dummy, sizeof(T) * DUMMY_TEST_VALUES));
Test(d_ref_itr, h_reference);
#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer
//
// Test with thrust::copy_if()
//
T *d_copy = NULL;
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES));
thrust::device_ptr<T> d_copy_wrapper(d_copy);
CubDebugExit(cudaMemset(d_copy, 0, sizeof(T) * TEST_VALUES));
thrust::copy_if(d_ref_itr, d_ref_itr + TEST_VALUES, d_copy_wrapper, SelectOp());
int compare = CompareDeviceResults(h_data, d_copy, TEST_VALUES, g_verbose, g_verbose);
printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS");
AssertEquals(0, compare);
if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy));
#endif // THRUST_VERSION
CubDebugExit(d_ref_itr.UnbindTexture());
CubDebugExit(d_ref_itr2.UnbindTexture());
if (h_data) delete[] h_data;
if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data));
if (d_dummy) CubDebugExit(g_allocator.DeviceFree(d_dummy));
}
/**
* Test texture transform iterator
*/
template <typename T, typename CastT>
void TestTexTransform()
{
printf("\nTesting tex-transform iterator on type %s\n", typeid(T).name()); fflush(stdout);
//
// Test iterator manipulation in kernel
//
constexpr int TEST_VALUES = 11000;
T *h_data = new T[TEST_VALUES];
for (int i = 0; i < TEST_VALUES; ++i)
{
InitValue(INTEGER_SEED, h_data[i], i);
}
// Allocate device arrays
T *d_data = NULL;
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES));
CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice));
TransformOp<T> op;
// Initialize reference data
T h_reference[8];
h_reference[0] = op(h_data[0]); // Value at offset 0
h_reference[1] = op(h_data[100]); // Value at offset 100
h_reference[2] = op(h_data[1000]); // Value at offset 1000
h_reference[3] = op(h_data[10000]); // Value at offset 10000
h_reference[4] = op(h_data[1]); // Value at offset 1
h_reference[5] = op(h_data[21]); // Value at offset 21
h_reference[6] = op(h_data[11]); // Value at offset 11
h_reference[7] = op(h_data[0]); // Value at offset 0;
// Create and bind texture iterator
typedef TexRefInputIterator<T, __LINE__> TextureIterator;
TextureIterator d_tex_itr;
CubDebugExit(d_tex_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES));
// Create transform iterator
TransformInputIterator<T, TransformOp<T>, TextureIterator> xform_itr(d_tex_itr, op);
Test(xform_itr, h_reference);
#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer
//
// Test with thrust::copy_if()
//
T *h_copy = new T[TEST_VALUES];
for (int i = 0; i < TEST_VALUES; ++i)
h_copy[i] = op(h_data[i]);
T *d_copy = NULL;
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES));
thrust::device_ptr<T> d_copy_wrapper(d_copy);
thrust::copy_if(xform_itr, xform_itr + TEST_VALUES, d_copy_wrapper, SelectOp());
int compare = CompareDeviceResults(h_copy, d_copy, TEST_VALUES, g_verbose, g_verbose);
printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS");
AssertEquals(0, compare);
// Cleanup
if (h_copy) delete[] h_copy;
if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy));
#endif // THRUST_VERSION
CubDebugExit(d_tex_itr.UnbindTexture());
if (h_data) delete[] h_data;
if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data));
}
#endif // CUDART_VERSION
/**
* Run non-integer tests
*/
template <typename T, typename CastT>
void Test(Int2Type<false> /* is_integer */)
{
TestModified<T, CastT>();
TestTransform<T, CastT>();
#if CUB_CDP
// Test tex-obj iterators if CUDA dynamic parallelism enabled
TestTexObj<T, CastT>(type_string);
#endif // CUB_CDP
#if CUDART_VERSION >= 5050
// Test tex-ref iterators for CUDA 5.5
TestTexRef<T, CastT>();
TestTexTransform<T, CastT>();
#endif // CUDART_VERSION
}
/**
* Run integer tests
*/
template <typename T, typename CastT>
void Test(Int2Type<true> /* is_integer */)
{
TestConstant<T>(0);
TestConstant<T>(99);
TestCounting<T>(0);
TestCounting<T>(99);
// Run non-integer tests
Test<T, CastT>(Int2Type<false>());
}
/**
* Run tests
*/
template <typename T>
void Test()
{
enum {
IS_INTEGER = (Traits<T>::CATEGORY == SIGNED_INTEGER) || (Traits<T>::CATEGORY == UNSIGNED_INTEGER)
};
// Test non-const type
Test<T, T>(Int2Type<IS_INTEGER>());
// Test non-const type
Test<T, const T>(Int2Type<IS_INTEGER>());
}
/**
* Main
*/
int main(int argc, char** argv)
{
// Initialize command line
CommandLineArgs args(argc, argv);
g_verbose = args.CheckCmdLineFlag("v");
// Print usage
if (args.CheckCmdLineFlag("help"))
{
printf("%s "
"[--device=<device-id>] "
"[--v] "
"\n", argv[0]);
exit(0);
}
// Initialize device
CubDebugExit(args.DeviceInit());
// Get ptx version
int ptx_version = 0;
CubDebugExit(PtxVersion(ptx_version));
// Evaluate different data types
Test<char>();
Test<short>();
Test<int>();
Test<long>();
Test<long long>();
Test<float>();
if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted
Test<double>();
Test<char2>();
Test<short2>();
Test<int2>();
Test<long2>();
Test<longlong2>();
Test<float2>();
if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted
Test<double2>();
Test<char3>();
Test<short3>();
Test<int3>();
Test<long3>();
Test<longlong3>();
Test<float3>();
if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted
Test<double3>();
Test<char4>();
Test<short4>();
Test<int4>();
Test<long4>();
Test<longlong4>();
Test<float4>();
if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted
Test<double4>();
Test<TestFoo>();
Test<TestBar>();
printf("\nTest complete\n"); fflush(stdout);
return 0;
}