diff --git a/openmp/libomptarget/DeviceRTL/include/Synchronization.h b/openmp/libomptarget/DeviceRTL/include/Synchronization.h --- a/openmp/libomptarget/DeviceRTL/include/Synchronization.h +++ b/openmp/libomptarget/DeviceRTL/include/Synchronization.h @@ -74,6 +74,10 @@ /// Atomically add \p V to \p *Addr with \p Ordering semantics. uint64_t add(uint64_t *Addr, uint64_t V, int Ordering); +/// Atomically write \p V to \p *Addr with \p Ordering semantics and return the +/// old value of \p *Addr. +uint32_t exchange(uint32_t *Addr, uint32_t V, int Ordering); + } // namespace atomic } // namespace _OMP diff --git a/openmp/libomptarget/DeviceRTL/src/Debug.cpp b/openmp/libomptarget/DeviceRTL/src/Debug.cpp --- a/openmp/libomptarget/DeviceRTL/src/Debug.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Debug.cpp @@ -13,16 +13,35 @@ #include "Debug.h" #include "Configuration.h" #include "Mapping.h" +#include "Synchronization.h" #include "Types.h" +#include "Utils.h" using namespace _OMP; #pragma omp declare target +/// Uninitialized on purpose to avoid any cost in case assertions are disabled +/// or we don't validate any. The likelihood AssertionFlag contains the value we +/// use to identify a broken assumption is negligible. Using the undefined value +/// in the comparison and then branch is technically UB, however the atomic +/// (builtin) access we use to read it does not expose the undef value to the +/// compiler yet. The hardware will not exploit the UB and we are A-OK as long +/// as LLVM won't look through __atomic_exchange. By the time it does we +/// hopefully have source level `freeze` intrinsics. +static uint32_t AssertionFlag [[clang::loader_uninitialized]]; + extern "C" { void __assert_assume(bool cond, const char *exp, const char *file, int line) { - if (!cond && config::isDebugMode(config::DebugKind::Assertion)) { - PRINTF("ASSERTION failed: %s at %s, line %d\n", exp, file, line); + if (config::isDebugMode(config::DebugKind::Assertion) && !cond) { + uint32_t exp_low, exp_high; + utils::unpack(uint64_t(exp), exp_low, exp_high); + uint32_t file_low, file_high; + utils::unpack(uint64_t(file), file_low, file_high); + uint32_t hash = (exp_low << 1) ^ (exp_high << 3) ^ (file_low << 7) ^ + (file_high << 11) ^ (line); + if (hash != atomic::exchange(&AssertionFlag, hash, __ATOMIC_SEQ_CST)) + PRINTF("ASSERTION failed: %s at %s, line %d\n", exp, file, line); __builtin_trap(); } diff --git a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp --- a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp @@ -262,6 +262,10 @@ return impl::atomicAdd(Addr, V, Ordering); } +uint32_t atomic::exchange(uint32_t *Addr, uint32_t V, int Ordering) { + return impl::atomicExchange(Addr, V, Ordering); +} + extern "C" { void __kmpc_ordered(IdentTy *Loc, int32_t TId) {}