diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu --- a/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu @@ -382,10 +382,10 @@ INLINE static int64_t Shuffle(unsigned active, int64_t val, int leader) { uint32_t lo, hi; - __kmpc_impl_unpack(val, lo, hi); + __kmpc_impl::Bits::unpack(val, lo, hi); hi = __kmpc_impl_shfl_sync(active, hi, leader); lo = __kmpc_impl_shfl_sync(active, lo, leader); - return __kmpc_impl_pack(lo, hi); + return __kmpc_impl::Bits::pack(lo, hi); } INLINE static uint64_t NextIter() { diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu --- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu @@ -29,10 +29,10 @@ EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size) { uint32_t lo, hi; - __kmpc_impl_unpack(val, lo, hi); + __kmpc_impl::Bits::unpack(val, lo, hi); hi = __kmpc_impl_shfl_down_sync(0xFFFFFFFF, hi, delta, size); lo = __kmpc_impl_shfl_down_sync(0xFFFFFFFF, lo, delta, size); - return __kmpc_impl_pack(lo, hi); + return __kmpc_impl::Bits::pack(lo, hi); } INLINE static void gpu_regular_warp_reduce(void *reduce_data, diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_api.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_api.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_api.h @@ -0,0 +1,49 @@ +//===--- target_api.h - OpenMP GPU target abstraction interface --- c++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// CRTP style static interface for target specific functions +// +//===----------------------------------------------------------------------===// + +#ifndef _TARGET_API_H_ +#define _TARGET_API_H_ + +#include + +#include "option.h" + +namespace __kmpc_impl { + +// nvcc requires this to be . Fails to compile . +template class Api { +public: + INLINE static uint64_t pack(uint32_t lo, uint32_t hi) { + return T::packImpl(lo, hi); + } + INLINE static void unpack(uint64_t val, uint32_t &lo, uint32_t &hi) { + T::unpackImpl(val, lo, hi); + } + +private: + INLINE static uint64_t packImpl(uint32_t lo, uint32_t hi); + INLINE static void unpackImpl(uint64_t, uint32_t &, uint32_t &) = delete; + +private: + friend T; + Api() = delete; +}; + +// Default implementations +template +INLINE uint64_t Api::packImpl(uint32_t lo, uint32_t hi) { + return (((uint64_t)hi) << 32u) | (uint64_t)lo; +} + +} // namespace __kmpc_impl + +#endif diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h --- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h @@ -1,4 +1,4 @@ -//===------------ target_impl.h - NVPTX OpenMP GPU options ------- CUDA -*-===// +//===-------- target_api.h - OpenMP GPU target abstraction ------- CUDA -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -14,20 +14,28 @@ #include +#include "target_api.h" #include "option.h" -INLINE void __kmpc_impl_unpack(uint64_t val, uint32_t &lo, uint32_t &hi) { - asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val)); -} - -INLINE uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi) { - uint64_t val; - asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi)); - return val; -} - typedef uint32_t __kmpc_impl_lanemask_t; +namespace __kmpc_impl { +class Bits : public Api { + friend class Api; + +private: + INLINE static uint64_t packImpl(uint32_t lo, uint32_t hi) { + uint64_t val; + asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi)); + return val; + } + + INLINE static void unpackImpl(uint64_t val, uint32_t &lo, uint32_t &hi) { + asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val)); + } +}; +} // namespace __kmpc_impl + INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() { __kmpc_impl_lanemask_t res; asm("mov.u32 %0, %%lanemask_lt;" : "=r"(res));