LIVE / thrust /dependencies /cub /test /test_grid_barrier.cu
Xu Ma
update
1c3c0d9
raw
history blame
5.13 kB
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/
/******************************************************************************
* Test evaluation for software global barrier throughput
******************************************************************************/
// Ensure printing of CUDA runtime errors to console
#define CUB_STDERR
#include <stdio.h>
#include <cub/grid/grid_barrier.cuh>
#include "test_util.h"
using namespace cub;
//---------------------------------------------------------------------
// Test kernels
//---------------------------------------------------------------------
/**
* Kernel that iterates through the specified number of software global barriers
*/
__global__ void Kernel(
GridBarrier global_barrier,
int iterations)
{
for (int i = 0; i < iterations; i++)
{
global_barrier.Sync();
}
}
//---------------------------------------------------------------------
// Main
//---------------------------------------------------------------------
/**
* Main
*/
int main(int argc, char** argv)
{
cudaError_t retval = cudaSuccess;
// Defaults
int iterations = 10000;
int block_size = 128;
int grid_size = -1;
// Initialize command line
CommandLineArgs args(argc, argv);
// Get args
args.GetCmdLineArgument("i", iterations);
args.GetCmdLineArgument("grid-size", grid_size);
args.GetCmdLineArgument("block-size", block_size);
// Print usage
if (args.CheckCmdLineFlag("help"))
{
printf("%s "
"[--device=<device-id>]"
"[--i=<iterations>]"
"[--grid-size<grid-size>]"
"[--block-size<block-size>]"
"\n", argv[0]);
exit(0);
}
// Initialize device
CubDebugExit(args.DeviceInit());
// Get device ordinal
int device_ordinal;
CubDebugExit(cudaGetDevice(&device_ordinal));
// Get device SM version
int sm_version;
CubDebugExit(SmVersion(sm_version, device_ordinal));
// Get SM properties
int sm_count, max_block_threads, max_sm_occupancy;
CubDebugExit(cudaDeviceGetAttribute(&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal));
CubDebugExit(cudaDeviceGetAttribute(&max_block_threads, cudaDevAttrMaxThreadsPerBlock, device_ordinal));
CubDebugExit(MaxSmOccupancy(max_sm_occupancy, EmptyKernel<void>, 32));
// Compute grid size and occupancy
int occupancy = CUB_MIN((max_block_threads / block_size), max_sm_occupancy);
if (grid_size == -1)
{
grid_size = occupancy * sm_count;
}
else
{
occupancy = grid_size / sm_count;
}
printf("Initializing software global barrier for Kernel<<<%d,%d>>> with %d occupancy\n",
grid_size, block_size, occupancy);
fflush(stdout);
// Init global barrier
GridBarrierLifetime global_barrier;
global_barrier.Setup(grid_size);
// Time kernel
GpuTimer gpu_timer;
gpu_timer.Start();
Kernel<<<grid_size, block_size>>>(global_barrier, iterations);
gpu_timer.Stop();
retval = CubDebug(cudaThreadSynchronize());
// Output timing results
float avg_elapsed = gpu_timer.ElapsedMillis() / float(iterations);
printf("%d iterations, %f total elapsed millis, %f avg elapsed millis\n",
iterations,
gpu_timer.ElapsedMillis(),
avg_elapsed);
return retval;
}