Spaces:
Running
Running
using namespace parrots; | |
using phalf = float16; | |
/** atomicAdd **/ | |
static __inline__ __device__ double atomicAdd(double* address, double val) { | |
unsigned long long int* address_as_ull = (unsigned long long int*)address; | |
unsigned long long int old = *address_as_ull, assumed; | |
if (val == 0.0) return __longlong_as_double(old); | |
do { | |
assumed = old; | |
old = atomicCAS(address_as_ull, assumed, | |
__double_as_longlong(val + __longlong_as_double(assumed))); | |
} while (assumed != old); | |
return __longlong_as_double(old); | |
} | |
static __inline__ __device__ float16 atomicAdd(float16* address, float16 val) { | |
unsigned int* aligned = | |
(unsigned int*)((size_t)address - ((size_t)address & 2)); | |
unsigned int old = *aligned; | |
unsigned int assumed; | |
unsigned short old_as_us; | |
do { | |
assumed = old; | |
old_as_us = | |
(unsigned short)((size_t)address & 2 ? old >> 16 : old & 0xffff); | |
float16 tmp; | |
tmp.x = old_as_us; | |
float16 sum = tmp + val; | |
unsigned short sum_as_us = sum.x; | |
// half sum = __float2half_rn(__half2float(__ushort_as_half(old_as_us)) | |
// + (float)(val)); unsigned short sum_as_us = __half_as_ushort(sum); | |
unsigned short sum_as_us = | |
__float2half_rn(__half2float(old_as_us) + (float)(val)); | |
unsigned int sum_as_ui = (size_t)address & 2 | |
? (sum_as_us << 16) | (old & 0xffff) | |
: (old & 0xffff0000) | sum_as_us; | |
old = atomicCAS(aligned, assumed, sum_as_ui); | |
} while (assumed != old); | |
//__half_raw raw = {old_as_us}; | |
// return float16(raw); | |
return *reinterpret_cast<float16*>(&old_as_us); | |
} | |