diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h @@ -25,8 +25,6 @@ #define PRId64 "ld" #define PRIu64 "lu" -typedef uint64_t __kmpc_impl_lanemask_t; - #define INLINE inline #define NOINLINE __attribute__((noinline)) #define ALIGN(N) __attribute__((aligned(N))) @@ -47,6 +45,14 @@ enum { MAX_THREADS_PER_TEAM = getGridValue().GV_Max_WG_Size }; enum { WARPSIZE = getGridValue().GV_Warp_Size }; +namespace detail { +template struct UnsignedToType; +template <> struct UnsignedToType<64u> { using type = uint64_t; }; +template <> struct UnsignedToType<32u> { using type = uint32_t; }; +} // namespace detail + +using __kmpc_impl_lanemask_t = detail::UnsignedToType::type; + // Maximum number of omp state objects per SM allocated statically in global // memory. #define OMP_STATE_COUNT 32 diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip @@ -49,7 +49,13 @@ // Warp vote function EXTERN __kmpc_impl_lanemask_t __kmpc_impl_activemask() { +#if __AMDGCN_WAVEFRONT_SIZE == 64 return __builtin_amdgcn_read_exec(); +#elif __AMDGCN_WAVEFRONT_SIZE == 32 + return __builtin_amdgcn_read_exec_lo(); +#else + #error "Unexpected WAVEFRONT_SIZE" +#endif } static void pteam_mem_barrier(uint32_t num_threads, uint32_t *barrier_state) { @@ -136,7 +142,13 @@ EXTERN unsigned GetWarpId() { return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE; } EXTERN unsigned GetWarpSize() { return WARPSIZE; } EXTERN unsigned GetLaneId() { - return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); +#if __AMDGCN_WAVEFRONT_SIZE == 64 + return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); +#elif __AMDGCN_WAVEFRONT_SIZE == 32 + return __builtin_amdgcn_mbcnt_lo(~0u, 0u); +#else + #error "Unexpected WAVEFRONT_SIZE" +#endif } EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads() {