diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt --- a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt @@ -76,6 +76,7 @@ ${devicertl_base_directory}/common/omptarget.h ${devicertl_base_directory}/common/omptargeti.h ${devicertl_base_directory}/common/state-queue.h + ${devicertl_base_directory}/common/target_atomic.h ${devicertl_base_directory}/common/state-queuei.h ${devicertl_base_directory}/common/support.h) diff --git a/openmp/libomptarget/deviceRTLs/common/omptargeti.h b/openmp/libomptarget/deviceRTLs/common/omptargeti.h --- a/openmp/libomptarget/deviceRTLs/common/omptargeti.h +++ b/openmp/libomptarget/deviceRTLs/common/omptargeti.h @@ -11,6 +11,8 @@ // //===----------------------------------------------------------------------===// +#include "common/target_atomic.h" + //////////////////////////////////////////////////////////////////////////////// // Task Descriptor //////////////////////////////////////////////////////////////////////////////// @@ -207,7 +209,7 @@ ASSERT0(LT_FUSSY, usedMemIdx < OMP_STATE_COUNT, "MemIdx is too big or uninitialized."); MemDataTy &MD = MemData[usedSlotIdx]; - atomicExch((unsigned *)&MD.keys[usedMemIdx], 0); + __kmpc_atomic_exchange((unsigned *)&MD.keys[usedMemIdx], 0u); } INLINE const void *omptarget_nvptx_SimpleMemoryManager::Acquire(const void *buf, @@ -217,7 +219,7 @@ const unsigned sm = usedSlotIdx; MemDataTy &MD = MemData[sm]; unsigned i = hash(GetBlockIdInKernel()); - while (atomicCAS((unsigned *)&MD.keys[i], 0, 1) != 0) { + while (__kmpc_atomic_cas((unsigned *)&MD.keys[i], 0u, 1u) != 0) { i = hash(i + 1); } usedSlotIdx = sm; diff --git a/openmp/libomptarget/deviceRTLs/common/src/libcall.cu b/openmp/libomptarget/deviceRTLs/common/src/libcall.cu --- a/openmp/libomptarget/deviceRTLs/common/src/libcall.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/libcall.cu @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "common/omptarget.h" +#include "common/target_atomic.h" #include "target_impl.h" EXTERN double omp_get_wtick(void) { diff --git a/openmp/libomptarget/deviceRTLs/common/src/loop.cu b/openmp/libomptarget/deviceRTLs/common/src/loop.cu --- a/openmp/libomptarget/deviceRTLs/common/src/loop.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/loop.cu @@ -14,6 +14,7 @@ #include "common/omptarget.h" #include "target_impl.h" +#include "common/target_atomic.h" //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// @@ -397,9 +398,9 @@ unsigned int rank = __kmpc_impl_popc(active & lane_mask_lt); uint64_t warp_res; if (rank == 0) { - warp_res = atomicAdd( + warp_res = __kmpc_atomic_add( (unsigned long long *)&omptarget_nvptx_threadPrivateContext->Cnt(), - change); + (unsigned long long)change); } warp_res = Shuffle(active, warp_res, leader); return warp_res + rank; @@ -792,8 +793,8 @@ // Atomic max of iterations. uint64_t *varArray = (uint64_t *)array; uint64_t elem = varArray[i]; - (void)atomicMax((unsigned long long int *)Buffer, - (unsigned long long int)elem); + (void)__kmpc_atomic_max((unsigned long long int *)Buffer, + (unsigned long long int)elem); // Barrier. syncWorkersInGenericMode(NumThreads); diff --git a/openmp/libomptarget/deviceRTLs/common/src/reduction.cu b/openmp/libomptarget/deviceRTLs/common/src/reduction.cu --- a/openmp/libomptarget/deviceRTLs/common/src/reduction.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/reduction.cu @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "common/omptarget.h" +#include "common/target_atomic.h" #include "target_impl.h" EXTERN @@ -242,7 +243,7 @@ // atomicInc increments 'timestamp' and has a range [0, NumTeams-1]. // It resets 'timestamp' back to 0 once the last team increments // this counter. - unsigned val = atomicInc(timestamp, NumTeams - 1); + unsigned val = __kmpc_atomic_inc(timestamp, NumTeams - 1); IsLastTeam = val == NumTeams - 1; } @@ -377,7 +378,7 @@ if (checkSPMDMode(loc) && GetThreadIdInBlock() != 0) return 0; // The master thread of the team actually does the reduction. - while (atomicCAS((uint32_t *)crit, 0, 1)) + while (__kmpc_atomic_cas((uint32_t *)crit, 0u, 1u)) ; return 1; } @@ -386,7 +387,7 @@ __kmpc_nvptx_teams_end_reduce_nowait_simple(kmp_Ident *loc, int32_t global_tid, kmp_CriticalName *crit) { __kmpc_impl_threadfence_system(); - (void)atomicExch((uint32_t *)crit, 0); + (void)__kmpc_atomic_exchange((uint32_t *)crit, 0u); } INLINE static bool isMaster(kmp_Ident *loc, uint32_t ThreadId) { @@ -431,7 +432,7 @@ bool IsMaster = isMaster(loc, ThreadId); while (IsMaster) { // Atomic read - Bound = atomicAdd((uint32_t *)&IterCnt, 0); + Bound = __kmpc_atomic_add((uint32_t *)&IterCnt, 0u); if (TeamId < Bound + num_of_records) break; } @@ -447,7 +448,7 @@ // Increment team counter. // This counter is incremented by all teams in the current // BUFFER_SIZE chunk. - ChunkTeamCount = atomicInc((uint32_t *)&Cnt, num_of_records - 1); + ChunkTeamCount = __kmpc_atomic_inc((uint32_t *)&Cnt, num_of_records - 1u); } // Synchronize if (checkSPMDMode(loc)) @@ -522,7 +523,7 @@ if (IsMaster && ChunkTeamCount == num_of_records - 1) { // Allow SIZE number of teams to proceed writing their // intermediate results to the global buffer. - atomicAdd((uint32_t *)&IterCnt, num_of_records); + __kmpc_atomic_add((uint32_t *)&IterCnt, uint32_t(num_of_records)); } return 0; diff --git a/openmp/libomptarget/deviceRTLs/common/state-queuei.h b/openmp/libomptarget/deviceRTLs/common/state-queuei.h --- a/openmp/libomptarget/deviceRTLs/common/state-queuei.h +++ b/openmp/libomptarget/deviceRTLs/common/state-queuei.h @@ -1,4 +1,4 @@ -//===------- state-queue.cu - NVPTX OpenMP GPU State Queue ------- CUDA -*-===// +//===------- state-queuei.h - OpenMP GPU State Queue ------------- CUDA -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -17,15 +17,16 @@ //===----------------------------------------------------------------------===// #include "state-queue.h" +#include "common/target_atomic.h" template INLINE uint32_t omptarget_nvptx_Queue::ENQUEUE_TICKET() { - return atomicAdd((unsigned int *)&tail, 1); + return __kmpc_atomic_add((unsigned int *)&tail, 1u); } template INLINE uint32_t omptarget_nvptx_Queue::DEQUEUE_TICKET() { - return atomicAdd((unsigned int *)&head, 1); + return __kmpc_atomic_add((unsigned int *)&head, 1u); } template @@ -37,28 +38,28 @@ template INLINE bool omptarget_nvptx_Queue::IsServing(uint32_t slot, uint32_t id) { - return atomicAdd((unsigned int *)&ids[slot], 0) == id; + return __kmpc_atomic_add((unsigned int *)&ids[slot], 0u) == id; } template INLINE void omptarget_nvptx_Queue::PushElement(uint32_t slot, ElementType *element) { - atomicExch((unsigned long long *)&elementQueue[slot], - (unsigned long long)element); + __kmpc_atomic_exchange((unsigned long long *)&elementQueue[slot], + (unsigned long long)element); } template INLINE ElementType * omptarget_nvptx_Queue::PopElement(uint32_t slot) { - return (ElementType *)atomicAdd((unsigned long long *)&elementQueue[slot], - (unsigned long long)0); + return (ElementType *)__kmpc_atomic_add( + (unsigned long long *)&elementQueue[slot], (unsigned long long)0); } template INLINE void omptarget_nvptx_Queue::DoneServing(uint32_t slot, uint32_t id) { - atomicExch((unsigned int *)&ids[slot], (id + 1) % MAX_ID); + __kmpc_atomic_exchange((unsigned int *)&ids[slot], (id + 1) % MAX_ID); } template diff --git a/openmp/libomptarget/deviceRTLs/common/target_atomic.h b/openmp/libomptarget/deviceRTLs/common/target_atomic.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/deviceRTLs/common/target_atomic.h @@ -0,0 +1,38 @@ +//===---- target_atomic.h - OpenMP GPU target atomic functions ---- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Declarations of atomic functions provided by each target +// +//===----------------------------------------------------------------------===// + +#ifndef OMPTARGET_TARGET_ATOMIC_H +#define OMPTARGET_TARGET_ATOMIC_H + +#include "target_impl.h" + +template INLINE T __kmpc_atomic_add(T *address, T val) { + return atomicAdd(address, val); +} + +template INLINE T __kmpc_atomic_inc(T *address, T val) { + return atomicInc(address, val); +} + +template INLINE T __kmpc_atomic_max(T *address, T val) { + return atomicMax(address, val); +} + +template INLINE T __kmpc_atomic_exchange(T *address, T val) { + return atomicExch(address, val); +} + +template INLINE T __kmpc_atomic_cas(T *address, T compare, T val) { + return atomicCAS(address, compare, val); +} + +#endif diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu --- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu @@ -12,10 +12,11 @@ #include "target_impl.h" #include "common/debug.h" +#include "common/target_atomic.h" #define __OMP_SPIN 1000 -#define UNSET 0 -#define SET 1 +#define UNSET 0u +#define SET 1u EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock) { omp_unset_lock(lock); @@ -30,7 +31,7 @@ // (old == compare ? val : old) // TODO: not sure spinning is a good idea here.. - while (atomicCAS(lock, UNSET, SET) != UNSET) { + while (__kmpc_atomic_cas(lock, UNSET, SET) != UNSET) { clock_t start = clock(); clock_t now; for (;;) { @@ -44,7 +45,7 @@ } EXTERN void __kmpc_impl_unset_lock(omp_lock_t *lock) { - (void)atomicExch(lock, UNSET); + (void)__kmpc_atomic_exchange(lock, UNSET); } EXTERN int __kmpc_impl_test_lock(omp_lock_t *lock) {