File size: 8,438 Bytes
7885a28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
#pragma once

// Define math constants if they are not available
#ifndef M_E
#define M_E 2.71828182845904523536
#endif

#ifndef M_LOG2E
#define M_LOG2E 1.44269504088896340736
#endif

#ifndef M_LOG10E
#define M_LOG10E 0.434294481903251827651
#endif

#ifndef M_LN2
#define M_LN2 0.693147180559945309417
#endif

#ifndef M_LN10
#define M_LN10 2.30258509299404568402
#endif

#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif

#ifndef M_PI_2
#define M_PI_2 1.57079632679489661923
#endif

#ifndef M_PI_4
#define M_PI_4 0.785398163397448309616
#endif

#ifndef M_1_PI
#define M_1_PI 0.318309886183790671538
#endif

#ifndef M_2_PI
#define M_2_PI 0.636619772367581343076
#endif

#ifndef M_2_SQRTPI
#define M_2_SQRTPI 1.12837916709551257390
#endif

#ifndef M_SQRT2
#define M_SQRT2 1.41421356237309504880
#endif

#ifndef M_SQRT1_2
#define M_SQRT1_2 0.707106781186547524401
#endif

#ifdef __CUDACC__
#define XSF_HOST_DEVICE __host__ __device__

#include <cuda/std/cmath>
#include <cuda/std/cstddef>
#include <cuda/std/cstdint>
#include <cuda/std/limits>
#include <cuda/std/tuple>
#include <cuda/std/type_traits>
#include <cuda/std/utility>

// Fallback to global namespace for functions unsupported on NVRTC Jit
#ifdef _LIBCUDACXX_COMPILER_NVRTC
#include <cuda_runtime.h>
#endif

namespace std {

XSF_HOST_DEVICE inline double abs(double num) { return cuda::std::abs(num); }

XSF_HOST_DEVICE inline double exp(double num) { return cuda::std::exp(num); }

XSF_HOST_DEVICE inline double log(double num) { return cuda::std::log(num); }

XSF_HOST_DEVICE inline double sqrt(double num) { return cuda::std::sqrt(num); }

XSF_HOST_DEVICE inline bool isinf(double num) { return cuda::std::isinf(num); }

XSF_HOST_DEVICE inline bool isnan(double num) { return cuda::std::isnan(num); }

XSF_HOST_DEVICE inline bool isfinite(double num) { return cuda::std::isfinite(num); }

XSF_HOST_DEVICE inline double pow(double x, double y) { return cuda::std::pow(x, y); }

XSF_HOST_DEVICE inline double sin(double x) { return cuda::std::sin(x); }

XSF_HOST_DEVICE inline double cos(double x) { return cuda::std::cos(x); }

XSF_HOST_DEVICE inline double tan(double x) { return cuda::std::tan(x); }

XSF_HOST_DEVICE inline double atan(double x) { return cuda::std::atan(x); }

XSF_HOST_DEVICE inline double acos(double x) { return cuda::std::acos(x); }

XSF_HOST_DEVICE inline double sinh(double x) { return cuda::std::sinh(x); }

XSF_HOST_DEVICE inline double cosh(double x) { return cuda::std::cosh(x); }

XSF_HOST_DEVICE inline double asinh(double x) { return cuda::std::asinh(x); }

XSF_HOST_DEVICE inline bool signbit(double x) { return cuda::std::signbit(x); }

// Fallback to global namespace for functions unsupported on NVRTC
#ifndef _LIBCUDACXX_COMPILER_NVRTC
XSF_HOST_DEVICE inline double ceil(double x) { return cuda::std::ceil(x); }
XSF_HOST_DEVICE inline double floor(double x) { return cuda::std::floor(x); }
XSF_HOST_DEVICE inline double round(double x) { return cuda::std::round(x); }
XSF_HOST_DEVICE inline double trunc(double x) { return cuda::std::trunc(x); }
XSF_HOST_DEVICE inline double fma(double x, double y, double z) { return cuda::std::fma(x, y, z); }
XSF_HOST_DEVICE inline double copysign(double x, double y) { return cuda::std::copysign(x, y); }
XSF_HOST_DEVICE inline double modf(double value, double *iptr) { return cuda::std::modf(value, iptr); }
XSF_HOST_DEVICE inline double fmax(double x, double y) { return cuda::std::fmax(x, y); }
XSF_HOST_DEVICE inline double fmin(double x, double y) { return cuda::std::fmin(x, y); }
XSF_HOST_DEVICE inline double log10(double num) { return cuda::std::log10(num); }
XSF_HOST_DEVICE inline double log1p(double num) { return cuda::std::log1p(num); }
XSF_HOST_DEVICE inline double frexp(double num, int *exp) { return cuda::std::frexp(num, exp); }
XSF_HOST_DEVICE inline double ldexp(double num, int exp) { return cuda::std::ldexp(num, exp); }
XSF_HOST_DEVICE inline double fmod(double x, double y) { return cuda::std::fmod(x, y); }
XSF_HOST_DEVICE inline double nextafter(double from, double to) { return cuda::std::nextafter(from, to); }
#else
XSF_HOST_DEVICE inline double ceil(double x) { return ::ceil(x); }
XSF_HOST_DEVICE inline double floor(double x) { return ::floor(x); }
XSF_HOST_DEVICE inline double round(double x) { return ::round(x); }
XSF_HOST_DEVICE inline double trunc(double x) { return ::trunc(x); }
XSF_HOST_DEVICE inline double fma(double x, double y, double z) { return ::fma(x, y, z); }
XSF_HOST_DEVICE inline double copysign(double x, double y) { return ::copysign(x, y); }
XSF_HOST_DEVICE inline double modf(double value, double *iptr) { return ::modf(value, iptr); }
XSF_HOST_DEVICE inline double fmax(double x, double y) { return ::fmax(x, y); }
XSF_HOST_DEVICE inline double fmin(double x, double y) { return ::fmin(x, y); }
XSF_HOST_DEVICE inline double log10(double num) { return ::log10(num); }
XSF_HOST_DEVICE inline double log1p(double num) { return ::log1p(num); }
XSF_HOST_DEVICE inline double frexp(double num, int *exp) { return ::frexp(num, exp); }
XSF_HOST_DEVICE inline double ldexp(double num, int exp) { return ::ldexp(num, exp); }
XSF_HOST_DEVICE inline double fmod(double x, double y) { return ::fmod(x, y); }
XSF_HOST_DEVICE inline double nextafter(double from, double to) { return ::nextafter(from, to); }
#endif

template <typename T>
XSF_HOST_DEVICE void swap(T &a, T &b) {
    cuda::std::swap(a, b);
}

// Reimplement std::clamp until it's available in CuPy
template <typename T>
XSF_HOST_DEVICE constexpr T clamp(T &v, T &lo, T &hi) {
    return v < lo ? lo : (v > hi ? lo : v);
}

template <typename T>
using numeric_limits = cuda::std::numeric_limits<T>;

// Must use thrust for complex types in order to support CuPy
template <typename T>
using complex = thrust::complex<T>;

template <typename T>
XSF_HOST_DEVICE T abs(const complex<T> &z) {
    return thrust::abs(z);
}

template <typename T>
XSF_HOST_DEVICE complex<T> exp(const complex<T> &z) {
    return thrust::exp(z);
}

template <typename T>
XSF_HOST_DEVICE complex<T> log(const complex<T> &z) {
    return thrust::log(z);
}

template <typename T>
XSF_HOST_DEVICE T norm(const complex<T> &z) {
    return thrust::norm(z);
}

template <typename T>
XSF_HOST_DEVICE complex<T> sqrt(const complex<T> &z) {
    return thrust::sqrt(z);
}

template <typename T>
XSF_HOST_DEVICE complex<T> conj(const complex<T> &z) {
    return thrust::conj(z);
}

template <typename T>
XSF_HOST_DEVICE complex<T> pow(const complex<T> &x, const complex<T> &y) {
    return thrust::pow(x, y);
}

template <typename T>
XSF_HOST_DEVICE complex<T> pow(const complex<T> &x, const T &y) {
    return thrust::pow(x, y);
}

// Other types and utilities
template <typename T>
using is_floating_point = cuda::std::is_floating_point<T>;

template <bool Cond, typename T = void>
using enable_if = cuda::std::enable_if<Cond, T>;

template <typename T>
using decay = cuda::std::decay<T>;

template <typename T>
using invoke_result = cuda::std::invoke_result<T>;

template <typename T1, typename T2>
using pair = cuda::std::pair<T1, T2>;

template <typename... Types>
using tuple = cuda::std::tuple<Types...>;

using cuda::std::ptrdiff_t;
using cuda::std::size_t;
using cuda::std::uint64_t;

#define XSF_ASSERT(a)

} // namespace std

#else
#define XSF_HOST_DEVICE

#include <algorithm>
#include <cassert>
#include <cmath>
#include <complex>
#include <cstddef>
#include <cstdint>
#include <iterator>
#include <limits>
#include <math.h>
#include <tuple>
#include <type_traits>
#include <utility>

#ifdef DEBUG
#define XSF_ASSERT(a) assert(a)
#else
#define XSF_ASSERT(a)
#endif

namespace xsf {

// basic
using std::abs;

// exponential
using std::exp;

// power
using std::sqrt;

// trigonometric
using std::cos;
using std::sin;

// floating-point manipulation
using std::copysign;

// classification and comparison
using std::isfinite;
using std::isinf;
using std::isnan;
using std::signbit;

// complex
using std::imag;
using std::real;

template <typename T>
struct remove_complex {
    using type = T;
};

template <typename T>
struct remove_complex<std::complex<T>> {
    using type = T;
};

template <typename T>
using remove_complex_t = typename remove_complex<T>::type;

template <typename T>
struct complex_type {
    using type = std::complex<T>;
};

template <typename T>
using complex_type_t = typename complex_type<T>::type;

template <typename T>
using complex = complex_type_t<T>;

} // namespace xsf

#endif