diff --git a/openmp/libomptarget/deviceRTLs/common/src/loop.cu b/openmp/libomptarget/deviceRTLs/common/src/loop.cu --- a/openmp/libomptarget/deviceRTLs/common/src/loop.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/loop.cu @@ -14,6 +14,7 @@ #include "common/omptarget.h" #include "target_impl.h" +#include "common/target_atomic.h" //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// @@ -397,7 +398,7 @@ unsigned int rank = __kmpc_impl_popc(active & lane_mask_lt); uint64_t warp_res; if (rank == 0) { - warp_res = atomicAdd( + warp_res = kmpcAtomicAdd( (unsigned long long *)&omptarget_nvptx_threadPrivateContext->Cnt(), change); } diff --git a/openmp/libomptarget/deviceRTLs/common/state-queuei.h b/openmp/libomptarget/deviceRTLs/common/state-queuei.h --- a/openmp/libomptarget/deviceRTLs/common/state-queuei.h +++ b/openmp/libomptarget/deviceRTLs/common/state-queuei.h @@ -17,15 +17,16 @@ //===----------------------------------------------------------------------===// #include "state-queue.h" +#include "common/target_atomic.h" template INLINE uint32_t omptarget_nvptx_Queue::ENQUEUE_TICKET() { - return atomicAdd((unsigned int *)&tail, 1); + return kmpcAtomicAdd((unsigned int *)&tail, 1); } template INLINE uint32_t omptarget_nvptx_Queue::DEQUEUE_TICKET() { - return atomicAdd((unsigned int *)&head, 1); + return kmpcAtomicAdd((unsigned int *)&head, 1); } template @@ -37,7 +38,7 @@ template INLINE bool omptarget_nvptx_Queue::IsServing(uint32_t slot, uint32_t id) { - return atomicAdd((unsigned int *)&ids[slot], 0) == id; + return kmpcAtomicAdd((unsigned int *)&ids[slot], 0) == id; } template @@ -51,8 +52,8 @@ template INLINE ElementType * omptarget_nvptx_Queue::PopElement(uint32_t slot) { - return (ElementType *)atomicAdd((unsigned long long *)&elementQueue[slot], - (unsigned long long)0); + return (ElementType *)kmpcAtomicAdd((unsigned long long *)&elementQueue[slot], + (unsigned long long)0); } template diff --git a/openmp/libomptarget/deviceRTLs/common/target_atomic.h b/openmp/libomptarget/deviceRTLs/common/target_atomic.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/deviceRTLs/common/target_atomic.h @@ -0,0 +1,22 @@ +//===---- target_atomic.h - OpenMP GPU target atomic functions ---- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Declarations of atomic functions provided by each target +// +//===----------------------------------------------------------------------===// + +#ifndef OMPTARGET_TARGET_ATOMIC_H +#define OMPTARGET_TARGET_ATOMIC_H + +#include + +uint32_t kmpcAtomicAdd(uint32_t *address, uint32_t val); +int32_t kmpcAtomicAdd(int32_t *address, int32_t val); +uint64_t kmpcAtomicAdd(uint64_t *address, uint64_t val); + +#endif diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -59,6 +59,7 @@ ${devicertl_common_directory}/src/omptarget.cu ${devicertl_common_directory}/src/parallel.cu src/reduction.cu + src/target_atomic.cu ${devicertl_common_directory}/src/support.cu ${devicertl_common_directory}/src/sync.cu ${devicertl_common_directory}/src/task.cu diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu --- a/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "common/omptarget.h" +#include "common/target_atomic.h" // Timer precision is 1ns #define TIMER_PRECISION ((double)1E-9) @@ -382,7 +383,7 @@ EXTERN int omp_test_lock(omp_lock_t *lock) { // int atomicCAS(int* address, int compare, int val); // (old == compare ? val : old) - int ret = atomicAdd(lock, 0); + int ret = kmpcAtomicAdd(lock, 0); PRINT(LD_IO, "call omp_test_lock() return %d\n", ret); diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu --- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu @@ -15,6 +15,7 @@ #include #include "common/omptarget.h" +#include "common/target_atomic.h" #include "target_impl.h" EXTERN @@ -435,7 +436,7 @@ bool IsMaster = isMaster(loc, ThreadId); while (IsMaster) { // Atomic read - Bound = atomicAdd((uint32_t *)&IterCnt, 0); + Bound = kmpcAtomicAdd((uint32_t *)&IterCnt, 0); if (TeamId < Bound + num_of_records) break; } @@ -526,7 +527,7 @@ if (IsMaster && ChunkTeamCount == num_of_records - 1) { // Allow SIZE number of teams to proceed writing their // intermediate results to the global buffer. - atomicAdd((uint32_t *)&IterCnt, num_of_records); + kmpcAtomicAdd((uint32_t *)&IterCnt, num_of_records); } return 0; diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_atomic.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/target_atomic.cu new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_atomic.cu @@ -0,0 +1,25 @@ +//===----- target_atomic.cu - NVPTX OpenMP atomic functions ------ CUDA -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Definitions of atomic functions for nvptx +//===----------------------------------------------------------------------===// + +#include "common/target_atomic.h" +#include "target_impl.h" + +uint32_t kmpcAtomicAdd(uint32_t *address, uint32_t val) { + return atomicAdd(address, val); +} + +int32_t kmpcAtomicAdd(int32_t *address, int32_t val) { + return atomicAdd(address, val); +} + +uint64_t kmpcAtomicAdd(uint64_t *address, uint64_t val) { + return atomicAdd(address, val); +}