diff --git a/openmp/libomptarget/DeviceRTL/include/Utils.h b/openmp/libomptarget/DeviceRTL/include/Utils.h --- a/openmp/libomptarget/DeviceRTL/include/Utils.h +++ b/openmp/libomptarget/DeviceRTL/include/Utils.h @@ -79,6 +79,9 @@ return *((DstTy *)(&V)); } +/// Return true iff \p Ptr is pointing into shared (local) memory (AS(3)). +bool isSharedMemPtr(void *Ptr); + /// A pointer variable that has by design an `undef` value. Use with care. __attribute__((loader_uninitialized)) static void *const UndefPtr; diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -15,6 +15,7 @@ #include "Mapping.h" #include "Synchronization.h" #include "Types.h" +#include "Utils.h" using namespace _OMP; @@ -148,7 +149,7 @@ void SharedMemorySmartStackTy::pop(void *Ptr, uint32_t Bytes) { uint64_t AlignedBytes = utils::align_up(Bytes, Alignment); - if (Ptr >= &Data[0] && Ptr < &Data[state::SharedScratchpadSize]) { + if (utils::isSharedMemPtr(Ptr)) { int TId = mapping::getThreadIdInBlock(); Usage[TId] -= AlignedBytes; return; diff --git a/openmp/libomptarget/DeviceRTL/src/Utils.cpp b/openmp/libomptarget/DeviceRTL/src/Utils.cpp --- a/openmp/libomptarget/DeviceRTL/src/Utils.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Utils.cpp @@ -99,6 +99,7 @@ return __builtin_amdgcn_ds_bpermute(Index << 2, Var); } +bool isSharedMemPtr(void *Ptr) { return __builtin_amdgcn_is_shared(Ptr); } #pragma omp end declare variant ///} @@ -117,6 +118,8 @@ return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T); } +bool isSharedMemPtr(void *Ptr) { return __nvvm_isspacep_shared(Ptr); } + #pragma omp end declare variant } // namespace impl @@ -137,6 +140,8 @@ return impl::shuffleDown(Mask, Var, Delta, Width); } +bool utils::isSharedMemPtr(void *Ptr) { return impl::isSharedMemPtr(Ptr); } + extern "C" { int32_t __kmpc_shuffle_int32(int32_t Val, int16_t Delta, int16_t SrcLane) { FunctionTracingRAII();