GameServerX

Running

App Files Files Community

GameServerX / MLPY /Lib /site-packages /torch /include /c10 /macros /Macros.h

Kano001

Upload 5252 files

c61ccee verified 10 months ago

raw

history blame

20.1 kB

	#ifndef C10_MACROS_MACROS_H_
	#define C10_MACROS_MACROS_H_
	#include <cassert>

	/* Main entry for c10/macros.
	*
	* In your code, include c10/macros/Macros.h directly, instead of individual
	* files in this folder.
	*/

	// For build systems that do not directly depend on CMake and directly build
	// from the source directory (such as Buck), one may not have a cmake_macros.h
	// file at all. In this case, the build system is responsible for providing
	// correct macro definitions corresponding to the cmake_macros.h.in file.
	//
	// In such scenarios, one should define the macro
	// C10_USING_CUSTOM_GENERATED_MACROS
	// to inform this header that it does not need to include the cmake_macros.h
	// file.

	#ifndef C10_USING_CUSTOM_GENERATED_MACROS
	#include <c10/macros/cmake_macros.h>
	#endif // C10_USING_CUSTOM_GENERATED_MACROS

	#include <c10/macros/Export.h>

	#if defined(__clang__)
	#define __ubsan_ignore_float_divide_by_zero__ \
	__attribute__((no_sanitize("float-divide-by-zero")))
	#define __ubsan_ignore_undefined__ __attribute__((no_sanitize("undefined")))
	#define __ubsan_ignore_signed_int_overflow__ \
	__attribute__((no_sanitize("signed-integer-overflow")))
	#define __ubsan_ignore_pointer_overflow__ \
	__attribute__((no_sanitize("pointer-overflow")))
	#define __ubsan_ignore_function__ __attribute__((no_sanitize("function")))
	#else
	#define __ubsan_ignore_float_divide_by_zero__
	#define __ubsan_ignore_undefined__
	#define __ubsan_ignore_signed_int_overflow__
	#define __ubsan_ignore_pointer_overflow__
	#define __ubsan_ignore_function__
	#endif

	// Detect address sanitizer as some stuff doesn't work with it
	#undef C10_ASAN_ENABLED

	// for clang
	#if defined(__has_feature)
	#if ((__has_feature(address_sanitizer)))
	#define C10_ASAN_ENABLED 1
	#endif
	#endif

	// for gcc
	#if defined(__SANITIZE_ADDRESS__)
	#if __SANITIZE_ADDRESS__
	#if !defined(C10_ASAN_ENABLED)
	#define C10_ASAN_ENABLED 1
	#endif
	#endif
	#endif

	#if !defined(C10_ASAN_ENABLED)
	#define C10_ASAN_ENABLED 0
	#endif

	// Disable the copy and assignment operator for a class. Note that this will
	// disable the usage of the class in std containers.
	#define C10_DISABLE_COPY_AND_ASSIGN(classname) \
	classname(const classname&) = delete; \
	classname& operator=(const classname&) = delete

	#define C10_CONCATENATE_IMPL(s1, s2) s1##s2
	#define C10_CONCATENATE(s1, s2) C10_CONCATENATE_IMPL(s1, s2)

	#define C10_MACRO_EXPAND(args) args

	#define C10_STRINGIZE_IMPL(x) #x
	#define C10_STRINGIZE(x) C10_STRINGIZE_IMPL(x)

	/**
	* C10_ANONYMOUS_VARIABLE(str) introduces an identifier starting with
	* str and ending with a number that varies with the line.
	*/
	#ifdef __COUNTER__
	#define C10_UID __COUNTER__
	#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __COUNTER__)
	#else
	#define C10_UID __LINE__
	#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __LINE__)
	#endif

	#ifdef __has_cpp_attribute
	#define C10_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
	#else
	#define C10_HAS_CPP_ATTRIBUTE(x) (0)
	#endif

	/// C10_NODISCARD - Warn if a type or return value is discarded.

	// Technically, we should check if __cplusplus > 201402L here, because
	// [[nodiscard]] is only defined in C++17. However, some compilers
	// we care about don't advertise being C++17 (e.g., clang), but
	// support the attribute anyway. In fact, this is not just a good idea,
	// it's the law: clang::warn_unused_result doesn't work on nvcc + clang
	// and the best workaround for this case is to use [[nodiscard]]
	// instead; see https://github.com/pytorch/pytorch/issues/13118
	//
	// Note to future editors: if you have noticed that a compiler is
	// misbehaving (e.g., it advertises support, but the support doesn't
	// actually work, or it is emitting warnings). Some compilers which
	// are strict about the matter include MSVC, which will complain:
	//
	// error C2429: attribute 'nodiscard' requires compiler flag '/std:c++latest'
	//
	// Exhibits:
	// - MSVC 19.14: https://godbolt.org/z/Dzd7gn (requires /std:c++latest)
	// - Clang 8.0.0: https://godbolt.org/z/3PYL4Z (always advertises support)
	// - gcc 8.3: https://godbolt.org/z/4tLMQS (always advertises support)
	#if C10_HAS_CPP_ATTRIBUTE(nodiscard)
	#define C10_NODISCARD [[nodiscard]]
	// Workaround for llvm.org/PR23435, since clang 3.6 and below emit a spurious
	// error when __has_cpp_attribute is given a scoped attribute in C mode.
	#elif __cplusplus && C10_HAS_CPP_ATTRIBUTE(clang::warn_unused_result)
	// TODO: It's possible this is still triggering
	// https://github.com/pytorch/pytorch/issues/13118 on Windows; if it is, better
	// fix it.
	#define C10_NODISCARD [[clang::warn_unused_result]]
	#else
	#define C10_NODISCARD
	#endif

	// suppress an unused variable.
	#if defined(_MSC_VER) && !defined(__clang__)
	#define C10_UNUSED __pragma(warning(suppress : 4100 4101))
	#else
	#define C10_UNUSED __attribute__((__unused__))
	#endif //_MSC_VER

	#if !defined(__has_attribute)
	#define __has_attribute(x) 0
	#endif

	// Direct port of LLVM_ATTRIBUTE_USED.
	#if __has_attribute(used)
	#define C10_USED __attribute__((__used__))
	#else
	#define C10_USED
	#endif

	#define C10_RESTRICT __restrict

	// Simply define the namespace, in case a dependent library want to refer to
	// the c10 namespace but not any nontrivial files.
	namespace c10 {}
	namespace c10::cuda {}
	namespace c10::hip {}
	namespace c10::xpu {}

	// Since C10 is the core library for caffe2 (and aten), we will simply reroute
	// all abstractions defined in c10 to be available in caffe2 as well.
	// This is only for backwards compatibility. Please use the symbols from the
	// c10 namespace where possible.
	namespace caffe2 {
	using namespace c10;
	}
	namespace at {
	using namespace c10;
	}
	namespace at::cuda {
	using namespace c10::cuda;
	} // namespace at::cuda

	// WARNING!!! THIS IS A GIANT HACK!!!
	// This line means you cannot simultaneously include c10/hip
	// and c10/cuda and then use them from the at::cuda namespace.
	// This is true in practice, because HIPIFY works inplace on
	// files in ATen/cuda, so it assumes that c10::hip is available
	// from at::cuda. This namespace makes that happen. When
	// HIPIFY is no longer out-of-place, we can switch the cuda
	// here to hip and everyone is happy.
	namespace at::cuda {
	using namespace c10::hip;
	} // namespace at::cuda

	namespace at::xpu {
	using namespace c10::xpu;
	} // namespace at::xpu

	// C10_LIKELY/C10_UNLIKELY
	//
	// These macros provide parentheses, so you can use these macros as:
	//
	// if C10_LIKELY(some_expr) {
	// ...
	// }
	//
	// NB: static_cast to boolean is mandatory in C++, because __builtin_expect
	// takes a long argument, which means you may trigger the wrong conversion
	// without it.
	//
	#if defined(__GNUC__) \|\| defined(__ICL) \|\| defined(__clang__)
	#define C10_LIKELY(expr) (__builtin_expect(static_cast<bool>(expr), 1))
	#define C10_UNLIKELY(expr) (__builtin_expect(static_cast<bool>(expr), 0))
	#else
	#define C10_LIKELY(expr) (expr)
	#define C10_UNLIKELY(expr) (expr)
	#endif

	/// C10_NOINLINE - Functions whose declaration is annotated with this will not
	/// be inlined.
	#ifdef __GNUC__
	#define C10_NOINLINE __attribute__((noinline))
	#elif _MSC_VER
	#define C10_NOINLINE __declspec(noinline)
	#else
	#define C10_NOINLINE
	#endif

	#if defined(_MSC_VER)
	#define C10_ALWAYS_INLINE __forceinline
	#elif __has_attribute(always_inline) \|\| defined(__GNUC__)
	#define C10_ALWAYS_INLINE __attribute__((__always_inline__)) inline
	#else
	#define C10_ALWAYS_INLINE inline
	#endif

	#if defined(_MSC_VER)
	#define C10_ATTR_VISIBILITY_HIDDEN
	#elif defined(__GNUC__)
	#define C10_ATTR_VISIBILITY_HIDDEN __attribute__((__visibility__("hidden")))
	#else
	#define C10_ATTR_VISIBILITY_HIDDEN
	#endif

	#define C10_ERASE C10_ALWAYS_INLINE C10_ATTR_VISIBILITY_HIDDEN

	#include <cstdint>

	#ifdef __HIPCC__
	// Unlike CUDA, HIP requires a HIP header to be included for __host__ to work.
	// We do this #include here so that C10_HOST_DEVICE and friends will Just Work.
	// See https://github.com/ROCm-Developer-Tools/HIP/issues/441
	#include <hip/hip_runtime.h>
	#endif

	#if defined(__CUDACC__) \|\| defined(__HIPCC__)
	// Designates functions callable from the host (CPU) and the device (GPU)
	#define C10_HOST_DEVICE __host__ __device__
	#define C10_DEVICE __device__
	#define C10_HOST __host__
	// constants from
	// (https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications)
	// The maximum number of threads per multiprocessor is 1024 for Turing
	// architecture (7.5), 1536 for Geforce Ampere (8.6)/Jetson Orin (8.7), and
	// 2048 for all other architectures. You'll get warnings if you exceed these
	// constants. Hence, the following macros adjust the input values from the user
	// to resolve potential warnings.
	#if __CUDA_ARCH__ == 750
	constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 1024;
	#elif __CUDA_ARCH__ == 860 \|\| __CUDA_ARCH__ == 870 \|\| __CUDA_ARCH__ == 890
	constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 1536;
	#else
	constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 2048;
	#endif
	// CUDA_MAX_THREADS_PER_BLOCK is same for all architectures currently
	constexpr uint32_t CUDA_MAX_THREADS_PER_BLOCK = 1024;
	// CUDA_THREADS_PER_BLOCK_FALLBACK is the "canonical fallback" choice of block
	// size. 256 is a good number for this fallback and should give good occupancy
	// and versatility across all architectures.
	constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256;
	// NOTE: if you are thinking of constexpr-ify the inputs to launch bounds, it
	// turns out that although __launch_bounds__ can take constexpr, it
	// can't take a constexpr that has anything to do with templates.
	// Currently we use launch_bounds that depend on template arguments in
	// Loops.cuh, Reduce.cuh and LossCTC.cuh. Hence, C10_MAX_THREADS_PER_BLOCK
	// and C10_MIN_BLOCKS_PER_SM are kept as macros.
	// Suppose you were planning to write __launch_bounds__(a, b), based on your
	// performance tuning on a modern GPU. Instead, you should write
	// __launch_bounds__(C10_MAX_THREADS_PER_BLOCK(a), C10_MIN_BLOCKS_PER_SM(a, b)),
	// which will also properly respect limits on old architectures.
	#define C10_MAX_THREADS_PER_BLOCK(val) \
	(((val) <= CUDA_MAX_THREADS_PER_BLOCK) ? (val) \
	: CUDA_THREADS_PER_BLOCK_FALLBACK)
	#define C10_MIN_BLOCKS_PER_SM(threads_per_block, blocks_per_sm) \
	((((threads_per_block) * (blocks_per_sm) <= CUDA_MAX_THREADS_PER_SM) \
	? (blocks_per_sm) \
	: ((CUDA_MAX_THREADS_PER_SM + (threads_per_block)-1) / \
	(threads_per_block))))
	// C10_LAUNCH_BOUNDS is analogous to __launch_bounds__
	#define C10_LAUNCH_BOUNDS_0 \
	__launch_bounds__( \
	256, 4) // default launch bounds that should give good occupancy and
	// versatility across all architectures.
	#define C10_LAUNCH_BOUNDS_1(max_threads_per_block) \
	__launch_bounds__((C10_MAX_THREADS_PER_BLOCK((max_threads_per_block))))
	#define C10_LAUNCH_BOUNDS_2(max_threads_per_block, min_blocks_per_sm) \
	__launch_bounds__( \
	(C10_MAX_THREADS_PER_BLOCK((max_threads_per_block))), \
	(C10_MIN_BLOCKS_PER_SM((max_threads_per_block), (min_blocks_per_sm))))
	#else
	#define C10_HOST_DEVICE
	#define C10_HOST
	#define C10_DEVICE
	#endif

	#if defined(USE_ROCM)
	#define C10_HIP_HOST_DEVICE __host__ __device__
	#else
	#define C10_HIP_HOST_DEVICE
	#endif

	#if defined(USE_ROCM)
	#define C10_WARP_SIZE warpSize // = 64 or 32 (Defined in hip_runtime.h)
	#else
	#define C10_WARP_SIZE 32
	#endif

	#if defined(_MSC_VER) && _MSC_VER <= 1900
	#define __func__ __FUNCTION__
	#endif

	// CUDA_KERNEL_ASSERT checks the assertion
	// even when NDEBUG is defined. This is useful for important assertions in CUDA
	// code that would otherwise be suppressed when building Release.
	#if defined(__ANDROID__) \|\| defined(__APPLE__) \|\| defined(__FreeBSD__) \|\| \
	(defined(USE_ROCM) && ROCM_VERSION < 40100)
	// Those platforms do not support assert()
	#define CUDA_KERNEL_ASSERT(cond)
	#define SYCL_KERNEL_ASSERT(cond)
	#elif defined(_MSC_VER)
	#if defined(NDEBUG)
	extern "C" {
	C10_IMPORT
	#if defined(__SYCL_DEVICE_ONLY__)
	extern SYCL_EXTERNAL void _wassert(
	const wchar_t* wexpr,
	const wchar_t* wfile,
	unsigned line);
	#else
	#if defined(__CUDA_ARCH__)
	__host__ __device__
	#endif // __CUDA_ARCH__
	void
	_wassert(wchar_t const* _Message, wchar_t const* _File, unsigned _Line);
	#endif // __SYCL_DEVICE_ONLY__
	}
	#endif // NDEBUG
	#define CUDA_KERNEL_ASSERT(cond) \
	if (C10_UNLIKELY(!(cond))) { \
	(void)(_wassert( \
	_CRT_WIDE(#cond), \
	_CRT_WIDE(__FILE__), \
	static_cast<unsigned>(__LINE__)), \
	0); \
	}
	#define SYCL_KERNEL_ASSERT(cond) \
	if (C10_UNLIKELY(!(cond))) { \
	(void)(_wassert( \
	_CRT_WIDE(#cond), \
	_CRT_WIDE(__FILE__), \
	static_cast<unsigned>(__LINE__)), \
	0); \
	}
	#else // __APPLE__, _MSC_VER
	#if defined(NDEBUG)
	extern "C" {
	#if defined(__SYCL_DEVICE_ONLY__)
	extern SYCL_EXTERNAL void __assert_fail(
	const char* expr,
	const char* file,
	unsigned int line,
	const char* func);
	#else // __SYCL_DEVICE_ONLY__
	#if (defined(__CUDA_ARCH__) && !(defined(__clang__) && defined(__CUDA__)))
	// CUDA supports __assert_fail function which are common for both device
	// and host side code.
	__host__ __device__
	#endif

	// This forward declaration matching the declaration of __assert_fail
	// exactly how it is in glibc in case parts of the program are compiled with
	// different NDEBUG settings. Otherwise we might get 'ambiguous declaration'
	// error. Note: On ROCm - this declaration serves for host side compilation.
	void
	__assert_fail(
	const char* assertion,
	const char* file,
	unsigned int line,
	const char* function) noexcept __attribute__((__noreturn__));

	#endif // __SYCL_DEVICE_ONLY__
	}
	#endif // NDEBUG
	// ROCm disable kernel assert by default
	#if !defined(C10_USE_ROCM_KERNEL_ASSERT) and defined(USE_ROCM)
	#define CUDA_KERNEL_ASSERT(cond)
	#define SYCL_KERNEL_ASSERT(cond)
	#else
	#define CUDA_KERNEL_ASSERT(cond) \
	if (C10_UNLIKELY(!(cond))) { \
	__assert_fail( \
	#cond, __FILE__, static_cast<unsigned int>(__LINE__), __func__); \
	}
	#define SYCL_KERNEL_ASSERT(cond) \
	if (C10_UNLIKELY(!(cond))) { \
	__assert_fail( \
	#cond, __FILE__, static_cast<unsigned int>(__LINE__), __func__); \
	}
	#endif // C10_USE_ROCM_KERNEL_ASSERT and USE_ROCM
	#endif // __APPLE__

	#ifdef __APPLE__
	#include <TargetConditionals.h>
	#endif

	#if defined(__ANDROID__)
	#define C10_ANDROID 1
	#define C10_MOBILE 1
	#elif ( \
	defined(__APPLE__) && \
	(TARGET_IPHONE_SIMULATOR \|\| TARGET_OS_SIMULATOR \|\| TARGET_OS_IPHONE))
	#define C10_IOS 1
	#define C10_MOBILE 1
	#endif // ANDROID / IOS

	#if defined(C10_MOBILE) && C10_MOBILE
	#define C10_ALWAYS_INLINE_UNLESS_MOBILE inline
	#else
	#define C10_ALWAYS_INLINE_UNLESS_MOBILE C10_ALWAYS_INLINE
	#endif

	#if defined(__CUDA_ARCH__)
	#if defined(_MSC_VER) && defined(__CUDACC__)
	#define CONSTEXPR_EXCEPT_WIN_CUDA const
	#define C10_HOST_CONSTEXPR_EXCEPT_WIN_CUDA __host__

	// Note [static constexpr char* members for windows NVCC]
	// The Windows NVCC compiler doesn't handle static constexpr class members,
	// although it's fixed in a later version.
	// (see
	// https://developercommunity.visualstudio.com/t/intellisense-error-c11-static-constexpr-member-ini/245425)
	//
	// If we want to ensure that our field is static under all builds, then we need
	// to work around it specifically for windows NVCC by making it (a) const, (b)
	// defined outside of the class definition We need to define it outside of the
	// class definition because of the C++ standard; char* is not an integral type
	// (see
	// https://stackoverflow.com/questions/24278473/intellisense-a-member-of-type-const-char-const-cannot-have-an-in-class-in)
	//
	// So instead of this:
	// struct Foo {
	// static constexpr const char* name = "foo";
	// }
	// In Windows NVCC, we end up with this:
	// struct Foo {
	// static const char* name;
	// }
	// const char* Foo::name = "foo";
	//
	// This gives us a small perf hit for any code that wants to access these field
	// members, but right now it isn't used in any perf-critical code paths.
	#define STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(field, val) \
	static const char* field;
	#define STATIC_CONST_STR_OUT_OF_LINE_FOR_WIN_CUDA(cls, field, val) \
	const char* cls::field = val;
	#else
	#define CONSTEXPR_EXCEPT_WIN_CUDA constexpr
	#define C10_HOST_CONSTEXPR_EXCEPT_WIN_CUDA __host__

	#define STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(field, val) \
	static constexpr const char* field = val;
	#define STATIC_CONST_STR_OUT_OF_LINE_FOR_WIN_CUDA(cls, field, val)
	#endif
	#else
	#if defined(_MSC_VER) && defined(__CUDACC__)
	#define CONSTEXPR_EXCEPT_WIN_CUDA const
	#define C10_HOST_CONSTEXPR_EXCEPT_WIN_CUDA

	#define STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(field, val) \
	static const char* field;
	#define STATIC_CONST_STR_OUT_OF_LINE_FOR_WIN_CUDA(cls, field, val) \
	const char* cls::field = val;
	#else
	#define CONSTEXPR_EXCEPT_WIN_CUDA constexpr
	#define C10_HOST_CONSTEXPR_EXCEPT_WIN_CUDA constexpr

	#define STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(field, val) \
	static constexpr const char* field = val;
	#define STATIC_CONST_STR_OUT_OF_LINE_FOR_WIN_CUDA(cls, field, val)
	#endif
	#endif

	#ifndef HAS_DEMANGLE
	#if defined(__ANDROID__) \|\| defined(_WIN32) \|\| defined(__EMSCRIPTEN__)
	#define HAS_DEMANGLE 0
	#elif defined(__APPLE__) && \
	(TARGET_IPHONE_SIMULATOR \|\| TARGET_OS_SIMULATOR \|\| TARGET_OS_IPHONE)
	#define HAS_DEMANGLE 0
	#else
	#define HAS_DEMANGLE 1
	#endif
	#endif // HAS_DEMANGLE

	#define _C10_PRAGMA__(string) _Pragma(#string)
	#define _C10_PRAGMA_(string) _C10_PRAGMA__(string)

	#ifdef __clang__
	#define C10_CLANG_DIAGNOSTIC_PUSH() _Pragma("clang diagnostic push")
	#define C10_CLANG_DIAGNOSTIC_POP() _Pragma("clang diagnostic pop")
	#define C10_CLANG_DIAGNOSTIC_IGNORE(flag) \
	_C10_PRAGMA_(clang diagnostic ignored flag)
	#define C10_CLANG_HAS_WARNING(flag) __has_warning(flag)
	#else
	#define C10_CLANG_DIAGNOSTIC_PUSH()
	#define C10_CLANG_DIAGNOSTIC_POP()
	#define C10_CLANG_DIAGNOSTIC_IGNORE(flag)
	#define C10_CLANG_HAS_WARNING(flag) 0
	#endif

	#ifdef __clang__

	#define C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(warning) \
	_C10_PRAGMA_(clang diagnostic push) \
	_C10_PRAGMA_(clang diagnostic ignored "-Wunknown-warning-option") \
	_C10_PRAGMA_(clang diagnostic ignored warning)

	#define C10_DIAGNOSTIC_POP() _C10_PRAGMA_(clang diagnostic pop)

	#elif __GNUC__

	#define C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(warning) \
	_C10_PRAGMA_(GCC diagnostic push) \
	_C10_PRAGMA_(GCC diagnostic ignored "-Wpragmas") \
	_C10_PRAGMA_(GCC diagnostic ignored warning)

	#define C10_DIAGNOSTIC_POP() _C10_PRAGMA_(GCC diagnostic pop)

	#else

	#define C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(warning)
	#define C10_DIAGNOSTIC_POP()

	#endif

	#endif // C10_MACROS_MACROS_H_