GameServerX

Running

App Files Files Community

GameServerX / MLPY /Lib /site-packages /torch /include /ATen /native /EmbeddingBag.h

Kano001

Upload 5252 files

c61ccee verified 10 months ago

raw

history blame

5.06 kB

	#include <ATen/core/Tensor.h>
	#include <ATen/Config.h>
	#include <cstdint>

	#ifdef USE_FBGEMM
	#include <fbgemm/FbgemmEmbedding.h>
	#endif

	namespace at::native {

	void check_arguments(
	const Tensor& weight,
	const Tensor& indices,
	const Tensor& offsets,
	const int64_t mode,
	const c10::optional<Tensor>& per_sample_weights,
	bool include_last_offset);

	void make_bag_size_out(
	Tensor& bag_size_out,
	const Tensor& offsets,
	const Tensor& indices,
	const int64_t mode,
	const bool include_last_offset,
	const bool requires_grad);

	void make_max_indices_out(
	Tensor& max_indices_out,
	const Tensor& weight,
	const Tensor& indices,
	const Tensor& offsets,
	const Tensor& bag_size,
	const int64_t mode,
	bool include_last_offset);

	void make_offset2bag_out(
	Tensor& offset2bag,
	Tensor& output,
	const Tensor& weight,
	const Tensor& indices,
	const Tensor& offsets,
	const int64_t mode,
	const c10::optional<Tensor>& per_sample_weights,
	const int64_t padding_idx = -1);

	#ifdef USE_FBGEMM

	template<bool has_weight, typename TIndex, typename TData>
	struct _CallbackAndBlockSize {
	using TCallback = typename fbgemm::EmbeddingSpMDMKernelSignature<TData, TIndex, TIndex, TData>::Type;

	int64_t blockSize = -1;
	TCallback callback = nullptr;

	static TCallback generateCallback(int64_t block_size) {
	return fbgemm::GenerateEmbeddingSpMDM<TData, TIndex, TIndex, TData>(
	block_size,
	has_weight,
	/* normalize_by_lengths */false,
	/* prefetch */16,
	/* is_weight_positional */false,
	/* use_offsets */true);
	}

	_CallbackAndBlockSize() = default;

	explicit _CallbackAndBlockSize(c10::optional<int64_t> maybe_block_size)
	: blockSize(maybe_block_size.value_or(-1))
	, callback(maybe_block_size.has_value() ? generateCallback(maybe_block_size.value()) : nullptr)
	{}
	};

	template<typename... StorageMixins>
	struct _EmbeddingBagKernelCacheImpl : private StorageMixins... {

	_EmbeddingBagKernelCacheImpl() = default;
	// use each of the mixins to store corresponding kernel and block size
	explicit _EmbeddingBagKernelCacheImpl(c10::optional<int64_t> maybe_block_size)
	: StorageMixins(maybe_block_size)...
	{}

	// this method is thread safe (call sites may call from different threads)
	template<bool has_weight, typename TIndex, typename TData>
	typename _CallbackAndBlockSize<has_weight, TIndex, TData>::TCallback
	getCallback(int64_t block_size) const {
	// if the cache doesn't store the kernel for the incoming block size
	// (so it is different from the one stored in corresponding mixin)
	// regenerate the kernel (not writing it into the cache so we avoid locks)
	if (block_size != _CallbackAndBlockSize<has_weight, TIndex, TData>::blockSize) {
	return _CallbackAndBlockSize<has_weight, TIndex, TData>::generateCallback(block_size);
	}
	// else retrieve the cached kernel from the corresponding mixin
	return _CallbackAndBlockSize<has_weight, TIndex, TData>::callback;
	}
	};

	// instantiate the cache with the list of storage mixins
	// for each of the 8 _EmbeddingBagKernelCache* usages in the EmbeddingBag.cpp impl file
	using _EmbeddingBagKernelCache = _EmbeddingBagKernelCacheImpl<
	_CallbackAndBlockSize<true, int32_t, float>,
	_CallbackAndBlockSize<false, int32_t, float>,
	_CallbackAndBlockSize<true, int64_t, float>,
	_CallbackAndBlockSize<false, int64_t, float>,
	_CallbackAndBlockSize<true, int32_t, unsigned short>,
	_CallbackAndBlockSize<false, int32_t, unsigned short>,
	_CallbackAndBlockSize<true, int64_t, unsigned short>,
	_CallbackAndBlockSize<false, int64_t, unsigned short>>;
	#else
	struct _EmbeddingBagKernelCache {
	explicit _EmbeddingBagKernelCache(c10::optional<int64_t> /* maybe_block_size */) {}
	};
	#endif

	void _embedding_bag_cpu_impl_out(Tensor& output, Tensor& offset2bag,
	Tensor& bag_size, Tensor* max_indices,
	const Tensor &weight, const Tensor &indices,
	const Tensor &offsets, const int64_t mode = 0,
	const c10::optional<Tensor>& per_sample_weights = c10::nullopt,
	bool include_last_offset = false,
	int64_t padding_idx = -1,
	_EmbeddingBagKernelCache* fbgemm_kernel_cache = nullptr);

	void _embedding_bag_cpu_out(
	at::Tensor& output,
	at::Tensor& offset2bag,
	at::Tensor& bag_size,
	at::Tensor* p_max_indices,
	const at::Tensor& weight,
	const at::Tensor& indices,
	const at::Tensor& offsets,
	const bool scale_grad_by_freq,
	const int64_t mode,
	const bool sparse,
	const c10::optional<at::Tensor>& per_sample_weights,
	const bool include_last_offset,
	const c10::optional<int64_t>& padding_idx,
	_EmbeddingBagKernelCache* fbgemm_kernel_cache = nullptr);

	} // namespace at::native