diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/hip_atomics.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/hip_atomics.h --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/hip_atomics.h +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/hip_atomics.h @@ -11,11 +11,6 @@ #include "target_impl.h" -// inc requires an amdgcn specific intrinsic which is not yet available -DEVICE unsigned atomicInc(unsigned *address); -DEVICE unsigned atomicInc(unsigned *address, unsigned max); -DEVICE int atomicInc(int *address); - namespace { template DEVICE T atomicAdd(T *address, T val) { @@ -38,5 +33,9 @@ return compare; } +INLINE uint32_t atomicInc(uint32_t *address, uint32_t max) { + return __builtin_amdgcn_atomic_inc32(address, max, __ATOMIC_SEQ_CST, ""); +} + } // namespace #endif diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h @@ -120,9 +120,17 @@ __builtin_amdgcn_s_barrier(); } -DEVICE void __kmpc_impl_threadfence(void); -DEVICE void __kmpc_impl_threadfence_block(void); -DEVICE void __kmpc_impl_threadfence_system(void); +INLINE void __kmpc_impl_threadfence() { + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent"); +} + +INLINE void __kmpc_impl_threadfence_block() { + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup"); +} + +INLINE void __kmpc_impl_threadfence_system() { + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, ""); +} // Calls to the AMDGCN layer (assuming 1D layout) INLINE int GetThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }