diff --git a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp --- a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp @@ -595,6 +595,16 @@ void omp_unset_lock(omp_lock_t *Lock) { impl::unsetLock(Lock); } int omp_test_lock(omp_lock_t *Lock) { return impl::testLock(Lock); } + +void ompx_sync_block(int Ordering) { + impl::syncThreadsAligned(atomic::OrderingTy(Ordering)); +} +void ompx_sync_block_acq_rel() { + impl::syncThreadsAligned(atomic::OrderingTy::acq_rel); +} +void ompx_sync_block_divergent(int Ordering) { + impl::syncThreads(atomic::OrderingTy(Ordering)); +} } // extern "C" #pragma omp end declare target diff --git a/openmp/libomptarget/test/api/ompx_sync.c b/openmp/libomptarget/test/api/ompx_sync.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/api/ompx_sync.c @@ -0,0 +1,42 @@ +// RUN: %libomptarget-compile-run-and-check-generic + +#include +#include +#include + +void foo(int device) { + int X; + // clang-format off +#pragma omp target teams map(from: X) device(device) thread_limit(2) num_teams(1) +#pragma omp parallel + // clang-format on + { + int tid = ompx_thread_id_x(); + int bid = ompx_block_id_x(); + if (tid == 1 && bid == 0) { + X = 42; + ompx_sync_block_divergent(3); + } else { + ompx_sync_block_divergent(1); + } + if (tid == 0 && bid == 0) + X++; + ompx_sync_block(ompx_seq_cst); + if (tid == 1 && bid == 0) + X++; + ompx_sync_block_acq_rel(); + if (tid == 0 && bid == 0) + X++; + ompx_sync_block(ompx_release); + if (tid == 0 && bid == 0) + X++; + } + // CHECK: X: 46 + // CHECK: X: 46 + printf("X: %i\n", X); +} + +int main() { + foo(omp_get_default_device()); + foo(omp_get_initial_device()); +} diff --git a/openmp/libomptarget/test/api/ompx_sync.cpp b/openmp/libomptarget/test/api/ompx_sync.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/api/ompx_sync.cpp @@ -0,0 +1,42 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +#include +#include +#include + +void foo(int device) { + int X; + // clang-format off +#pragma omp target teams map(from: X) device(device) thread_limit(2) num_teams(1) +#pragma omp parallel + // clang-format on + { + int tid = ompx::thread_id_x(); + int bid = ompx::block_id_x(); + if (tid == 1 && bid == 0) { + X = 42; + ompx::sync_block_divergent(3); + } else { + ompx::sync_block_divergent(); + } + if (tid == 0 && bid == 0) + X++; + ompx::sync_block(ompx::seq_cst); + if (tid == 1 && bid == 0) + X++; + ompx::sync_block(); + if (tid == 0 && bid == 0) + X++; + ompx_sync_block(ompx_release); + if (tid == 0 && bid == 0) + X++; + } + // CHECK: X: 46 + // CHECK: X: 46 + printf("X: %i\n", X); +} + +int main() { + foo(omp_get_default_device()); + foo(omp_get_initial_device()); +} diff --git a/openmp/runtime/src/include/ompx.h.var b/openmp/runtime/src/include/ompx.h.var --- a/openmp/runtime/src/include/ompx.h.var +++ b/openmp/runtime/src/include/ompx.h.var @@ -36,6 +36,14 @@ extern "C" { #endif +enum { + ompx_relaxed = __ATOMIC_RELAXED, + ompx_aquire = __ATOMIC_ACQUIRE, + ompx_release = __ATOMIC_RELEASE, + ompx_acq_rel = __ATOMIC_ACQ_REL, + ompx_seq_cst = __ATOMIC_SEQ_CST, +}; + enum { ompx_dim_x = 0, ompx_dim_y = 1, @@ -56,8 +64,33 @@ #undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C ///} +/// ompx_{sync_block}_{,divergent} +///{ +#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(RETTY, NAME, ARGS, BODY) \ + static inline RETTY ompx_##NAME(ARGS) { BODY; } + +_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block, int Ordering, + _Pragma("omp barrier")); +_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_acq_rel, void, + ompx_sync_block(ompx_acq_rel)); +_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_divergent, int Ordering, + ompx_sync_block(Ordering)); +#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C +///} + #pragma omp end declare variant +/// ompx_{sync_block}_{,divergent} +///{ +#define _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(RETTY, NAME, ARGS) \ + RETTY ompx_##NAME(ARGS); + +_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering); +_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void); +_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering); +#undef _TGT_KERNEL_LANGUAGE_DECL_SYNC_C +///} + /// ompx_{thread,block}_{id,dim}_{x,y,z} ///{ #define _TGT_KERNEL_LANGUAGE_DECL_GRID_C(NAME) \ @@ -87,6 +120,14 @@ dim_z = ompx_dim_z, }; +enum { + relaxed = ompx_relaxed , + aquire = ompx_aquire, + release = ompx_release, + acc_rel = ompx_acq_rel, + seq_cst = ompx_seq_cst, +}; + /// ompx::{thread,block}_{id,dim}_{,x,y,z} ///{ #define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(NAME) \ @@ -102,6 +143,20 @@ #undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX ///} +/// ompx_{sync_block}_{,divergent} +///{ +#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(RETTY, NAME, ARGS, CALL_ARGS) \ + static inline RETTY NAME(ARGS) { \ + return ompx_##NAME(CALL_ARGS); \ + } + +_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block, int Ordering = acc_rel, + Ordering); +_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block_divergent, + int Ordering = acc_rel, Ordering); +#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX +///} + } // namespace ompx #endif