GameServerX

Running

App Files Files Community

GameServerX / MLPY /Lib /site-packages /torch /include /c10 /cuda /CUDADeviceAssertion.h

Kano001

Upload 5252 files

c61ccee verified 10 months ago

raw

history blame

4.17 kB

	#pragma once

	#include <c10/cuda/CUDAException.h>
	#include <c10/macros/Macros.h>

	namespace c10::cuda {

	#ifdef TORCH_USE_CUDA_DSA
	// Copy string from `src` to `dst`
	static __device__ void dstrcpy(char* dst, const char* src) {
	int i = 0;
	// Copy string from source to destination, ensuring that it
	// isn't longer than `C10_CUDA_DSA_MAX_STR_LEN-1`
	while (*src != '\0' && i++ < C10_CUDA_DSA_MAX_STR_LEN - 1) {
	dst++ = src++;
	}
	*dst = '\0';
	}

	static __device__ void dsa_add_new_assertion_failure(
	DeviceAssertionsData* assertions_data,
	const char* assertion_msg,
	const char* filename,
	const char* function_name,
	const int line_number,
	const uint32_t caller,
	const dim3 block_id,
	const dim3 thread_id) {
	// `assertions_data` may be nullptr if device-side assertion checking
	// is disabled at run-time. If it is disabled at compile time this
	// function will never be called
	if (!assertions_data) {
	return;
	}

	// Atomically increment so other threads can fail at the same time
	// Note that incrementing this means that the CPU can observe that
	// a failure has happened and can begin to respond before we've
	// written information about that failure out to the buffer.
	const auto nid = atomicAdd(&(assertions_data->assertion_count), 1);

	if (nid >= C10_CUDA_DSA_ASSERTION_COUNT) {
	// At this point we're ran out of assertion buffer space.
	// We could print a message about this, but that'd get
	// spammy if a lot of threads did it, so we just silently
	// ignore any other assertion failures. In most cases the
	// failures will all probably be analogous anyway.
	return;
	}

	// Write information about the assertion failure to memory.
	// Note that this occurs only after the `assertion_count`
	// increment broadcasts that there's been a problem.
	auto& self = assertions_data->assertions[nid];
	dstrcpy(self.assertion_msg, assertion_msg);
	dstrcpy(self.filename, filename);
	dstrcpy(self.function_name, function_name);
	self.line_number = line_number;
	self.caller = caller;
	self.block_id[0] = block_id.x;
	self.block_id[1] = block_id.y;
	self.block_id[2] = block_id.z;
	self.thread_id[0] = thread_id.x;
	self.thread_id[1] = thread_id.y;
	self.thread_id[2] = thread_id.z;
	}

	// Emulates a kernel assertion. The assertion won't stop the kernel's progress,
	// so you should assume everything the kernel produces is garbage if there's an
	// assertion failure.
	// NOTE: This assumes that `assertions_data` and `assertion_caller_id` are
	// arguments of the kernel and therefore accessible.
	#define CUDA_KERNEL_ASSERT2(condition) \
	do { \
	if (C10_UNLIKELY(!(condition))) { \
	/* Has an atomic element so threads can fail at the same time */ \
	c10::cuda::dsa_add_new_assertion_failure( \
	assertions_data, \
	C10_STRINGIZE(condition), \
	__FILE__, \
	__FUNCTION__, \
	__LINE__, \
	assertion_caller_id, \
	blockIdx, \
	threadIdx); \
	/* Now that the kernel has failed we early exit the kernel, but */ \
	/* otherwise keep going and rely on the host to check UVM and */ \
	/* determine we've had a problem */ \
	return; \
	} \
	} while (false)
	#else
	#define CUDA_KERNEL_ASSERT2(condition) assert(condition)
	#endif

	} // namespace c10::cuda