diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h @@ -25,8 +25,6 @@ #define PRId64 "ld" #define PRIu64 "lu" -typedef uint64_t __kmpc_impl_lanemask_t; - #define INLINE inline #define NOINLINE __attribute__((noinline)) #define ALIGN(N) __attribute__((aligned(N))) @@ -51,6 +49,14 @@ enum { MAX_THREADS_PER_TEAM = getGridValue().GV_Max_WG_Size }; enum { WARPSIZE = getGridValue().GV_Warp_Size }; +namespace detail { +template struct UnsignedToType; +template <> struct UnsignedToType<64u> { using type = uint64_t; }; +template <> struct UnsignedToType<32u> { using type = uint32_t; }; +} // namespace detail + +using __kmpc_impl_lanemask_t = detail::UnsignedToType::type; + // Maximum number of omp state objects per SM allocated statically in global // memory. #define OMP_STATE_COUNT 32 diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip @@ -49,7 +49,12 @@ // Warp vote function EXTERN __kmpc_impl_lanemask_t __kmpc_impl_activemask() { - return __builtin_amdgcn_read_exec(); + static_assert(WARPSIZE == 64 || WARPSIZE == 32, ""); + if (WARPSIZE == 64) { + return __builtin_amdgcn_read_exec(); + } else { + return __builtin_amdgcn_read_exec_lo(); + } } static void pteam_mem_barrier(uint32_t num_threads, uint32_t *barrier_state) { @@ -136,7 +141,12 @@ EXTERN unsigned GetWarpId() { return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE; } EXTERN unsigned GetWarpSize() { return WARPSIZE; } EXTERN unsigned GetLaneId() { - return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); + static_assert(WARPSIZE == 64 || WARPSIZE == 32, ""); + if (WARPSIZE == 64) { + return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); + } else { + return __builtin_amdgcn_mbcnt_lo(~0u, 0u); + } } EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads() {