|
35 | 35 |
|
36 | 36 | #pragma push_macro("__MAKE_SHUFFLES")
|
37 | 37 | #define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask) \
|
38 |
| - inline __device__ int __FnName(int __in, int __offset, \ |
| 38 | + inline __device__ int __FnName(int __val, int __offset, \ |
39 | 39 | int __width = warpSize) { \
|
40 |
| - return __IntIntrinsic(__in, __offset, \ |
| 40 | + return __IntIntrinsic(__val, __offset, \ |
41 | 41 | ((warpSize - __width) << 8) | (__Mask)); \
|
42 | 42 | } \
|
43 |
| - inline __device__ float __FnName(float __in, int __offset, \ |
| 43 | + inline __device__ float __FnName(float __val, int __offset, \ |
44 | 44 | int __width = warpSize) { \
|
45 |
| - return __FloatIntrinsic(__in, __offset, \ |
| 45 | + return __FloatIntrinsic(__val, __offset, \ |
46 | 46 | ((warpSize - __width) << 8) | (__Mask)); \
|
47 | 47 | } \
|
48 |
| - inline __device__ unsigned int __FnName(unsigned int __in, int __offset, \ |
| 48 | + inline __device__ unsigned int __FnName(unsigned int __val, int __offset, \ |
49 | 49 | int __width = warpSize) { \
|
50 | 50 | return static_cast<unsigned int>( \
|
51 |
| - ::__FnName(static_cast<int>(__in), __offset, __width)); \ |
| 51 | + ::__FnName(static_cast<int>(__val), __offset, __width)); \ |
52 | 52 | } \
|
53 |
| - inline __device__ long long __FnName(long long __in, int __offset, \ |
| 53 | + inline __device__ long long __FnName(long long __val, int __offset, \ |
54 | 54 | int __width = warpSize) { \
|
55 | 55 | struct __Bits { \
|
56 | 56 | int __a, __b; \
|
57 | 57 | }; \
|
58 |
| - _Static_assert(sizeof(__in) == sizeof(__Bits)); \ |
| 58 | + _Static_assert(sizeof(__val) == sizeof(__Bits)); \ |
59 | 59 | _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \
|
60 | 60 | __Bits __tmp; \
|
61 |
| - memcpy(&__in, &__tmp, sizeof(__in)); \ |
| 61 | + memcpy(&__val, &__tmp, sizeof(__val)); \ |
62 | 62 | __tmp.__a = ::__FnName(__tmp.__a, __offset, __width); \
|
63 | 63 | __tmp.__b = ::__FnName(__tmp.__b, __offset, __width); \
|
64 |
| - long long __out; \ |
65 |
| - memcpy(&__out, &__tmp, sizeof(__tmp)); \ |
66 |
| - return __out; \ |
| 64 | + long long __ret; \ |
| 65 | + memcpy(&__ret, &__tmp, sizeof(__tmp)); \ |
| 66 | + return __ret; \ |
67 | 67 | } \
|
68 | 68 | inline __device__ unsigned long long __FnName( \
|
69 |
| - unsigned long long __in, int __offset, int __width = warpSize) { \ |
70 |
| - return static_cast<unsigned long long>( \ |
71 |
| - ::__FnName(static_cast<unsigned long long>(__in), __offset, __width)); \ |
| 69 | + unsigned long long __val, int __offset, int __width = warpSize) { \ |
| 70 | + return static_cast<unsigned long long>(::__FnName( \ |
| 71 | + static_cast<unsigned long long>(__val), __offset, __width)); \ |
72 | 72 | } \
|
73 |
| - inline __device__ double __FnName(double __in, int __offset, \ |
| 73 | + inline __device__ double __FnName(double __val, int __offset, \ |
74 | 74 | int __width = warpSize) { \
|
75 | 75 | long long __tmp; \
|
76 |
| - _Static_assert(sizeof(__tmp) == sizeof(__in)); \ |
77 |
| - memcpy(&__tmp, &__in, sizeof(__in)); \ |
| 76 | + _Static_assert(sizeof(__tmp) == sizeof(__val)); \ |
| 77 | + memcpy(&__tmp, &__val, sizeof(__val)); \ |
78 | 78 | __tmp = ::__FnName(__tmp, __offset, __width); \
|
79 |
| - double __out; \ |
80 |
| - memcpy(&__out, &__tmp, sizeof(__out)); \ |
81 |
| - return __out; \ |
| 79 | + double __ret; \ |
| 80 | + memcpy(&__ret, &__tmp, sizeof(__ret)); \ |
| 81 | + return __ret; \ |
82 | 82 | }
|
83 | 83 |
|
84 | 84 | __MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f);
|
|
0 commit comments