diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h @@ -13,5 +13,6 @@ #define EXTERN extern "C" __attribute__((device)) typedef uint64_t __kmpc_impl_lanemask_t; +typedef uint32_t omp_lock_t; /* arbitrary type of the right length */ #endif diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h @@ -155,6 +155,13 @@ INLINE int GetNumberOfBlocksInKernel() { return __ockl_get_num_groups(0); } INLINE int GetNumberOfThreadsInBlock() { return __ockl_get_local_size(0); } +// Locks +EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock); +EXTERN void __kmpc_impl_destroy_lock(omp_lock_t *lock); +EXTERN void __kmpc_impl_set_lock(omp_lock_t *lock); +EXTERN void __kmpc_impl_unset_lock(omp_lock_t *lock); +EXTERN int __kmpc_impl_test_lock(omp_lock_t *lock); + // DEVICE versions of part of libc extern "C" { DEVICE __attribute__((noreturn)) void diff --git a/openmp/libomptarget/deviceRTLs/interface.h b/openmp/libomptarget/deviceRTLs/interface.h --- a/openmp/libomptarget/deviceRTLs/interface.h +++ b/openmp/libomptarget/deviceRTLs/interface.h @@ -30,7 +30,6 @@ // OpenMP interface //////////////////////////////////////////////////////////////////////////////// -typedef uint32_t omp_lock_t; /* arbitrary type of the right length */ typedef uint64_t omp_nest_lock_t; /* arbitrary type of the right length */ typedef enum omp_sched_t { diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -55,6 +55,7 @@ ${devicertl_common_directory}/src/critical.cu src/data_sharing.cu src/libcall.cu + src/target_impl.cu ${devicertl_common_directory}/src/loop.cu ${devicertl_common_directory}/src/omptarget.cu ${devicertl_common_directory}/src/parallel.cu diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu --- a/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu @@ -336,54 +336,30 @@ // locks //////////////////////////////////////////////////////////////////////////////// -#define __OMP_SPIN 1000 -#define UNSET 0 -#define SET 1 - EXTERN void omp_init_lock(omp_lock_t *lock) { - omp_unset_lock(lock); + __kmpc_impl_init_lock(lock); PRINT0(LD_IO, "call omp_init_lock()\n"); } EXTERN void omp_destroy_lock(omp_lock_t *lock) { - omp_unset_lock(lock); + __kmpc_impl_destroy_lock(lock); PRINT0(LD_IO, "call omp_destroy_lock()\n"); } EXTERN void omp_set_lock(omp_lock_t *lock) { - // int atomicCAS(int* address, int compare, int val); - // (old == compare ? val : old) - - // TODO: not sure spinning is a good idea here.. - while (atomicCAS(lock, UNSET, SET) != UNSET) { - clock_t start = clock(); - clock_t now; - for (;;) { - now = clock(); - clock_t cycles = now > start ? now - start : now + (0xffffffff - start); - if (cycles >= __OMP_SPIN * GetBlockIdInKernel()) { - break; - } - } - } // wait for 0 to be the read value - + __kmpc_impl_set_lock(lock); PRINT0(LD_IO, "call omp_set_lock()\n"); } EXTERN void omp_unset_lock(omp_lock_t *lock) { - (void)atomicExch(lock, UNSET); - + __kmpc_impl_unset_lock(lock); PRINT0(LD_IO, "call omp_unset_lock()\n"); } EXTERN int omp_test_lock(omp_lock_t *lock) { - // int atomicCAS(int* address, int compare, int val); - // (old == compare ? val : old) - int ret = atomicAdd(lock, 0); - + int rc = __kmpc_impl_test_lock(lock); PRINT(LD_IO, "call omp_test_lock() return %d\n", ret); - - return ret; + return rc; } // for xlf Fotran diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/nvptx_interface.h b/openmp/libomptarget/deviceRTLs/nvptx/src/nvptx_interface.h --- a/openmp/libomptarget/deviceRTLs/nvptx/src/nvptx_interface.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/nvptx_interface.h @@ -13,5 +13,6 @@ #define EXTERN extern "C" __device__ typedef uint32_t __kmpc_impl_lanemask_t; +typedef uint32_t omp_lock_t; /* arbitrary type of the right length */ #endif diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h --- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h @@ -188,4 +188,11 @@ INLINE int GetNumberOfBlocksInKernel() { return gridDim.x; } INLINE int GetNumberOfThreadsInBlock() { return blockDim.x; } +// Locks +EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock); +EXTERN void __kmpc_impl_destroy_lock(omp_lock_t *lock); +EXTERN void __kmpc_impl_set_lock(omp_lock_t *lock); +EXTERN void __kmpc_impl_unset_lock(omp_lock_t *lock); +EXTERN int __kmpc_impl_test_lock(omp_lock_t *lock); + #endif diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu @@ -0,0 +1,54 @@ +//===---------- target_impl.cu - NVPTX OpenMP GPU options ------- CUDA -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Definitions of target specific functions +// +//===----------------------------------------------------------------------===// + +#include "target_impl.h" +#include "common/debug.h" + +#define __OMP_SPIN 1000 +#define UNSET 0 +#define SET 1 + +EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock) { + omp_unset_lock(lock); +} + +EXTERN void __kmpc_impl_destroy_lock(omp_lock_t *lock) { + omp_unset_lock(lock); +} + +EXTERN void __kmpc_impl_set_lock(omp_lock_t *lock) { + // int atomicCAS(int* address, int compare, int val); + // (old == compare ? val : old) + + // TODO: not sure spinning is a good idea here.. + while (atomicCAS(lock, UNSET, SET) != UNSET) { + clock_t start = clock(); + clock_t now; + for (;;) { + now = clock(); + clock_t cycles = now > start ? now - start : now + (0xffffffff - start); + if (cycles >= __OMP_SPIN * GetBlockIdInKernel()) { + break; + } + } + } // wait for 0 to be the read value +} + +EXTERN void __kmpc_impl_unset_lock(omp_lock_t *lock) { + (void)atomicExch(lock, UNSET); +} + +EXTERN int __kmpc_impl_test_lock(omp_lock_t *lock) { + // int atomicCAS(int* address, int compare, int val); + // (old == compare ? val : old) + return atomicAdd(lock, 0); +}