diff --git a/openmp/libomptarget/DeviceRTL/include/Utils.h b/openmp/libomptarget/DeviceRTL/include/Utils.h --- a/openmp/libomptarget/DeviceRTL/include/Utils.h +++ b/openmp/libomptarget/DeviceRTL/include/Utils.h @@ -74,6 +74,9 @@ return V - V % Align; } +/// Return true iff \p Ptr is pointing into shared (local) memory (AS(3)). +bool isSharedMemPtr(void *Ptr); + /// A pointer variable that has by design an `undef` value. Use with care. __attribute__((loader_uninitialized)) static void *const UndefPtr; diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -14,6 +14,7 @@ #include "Interface.h" #include "Synchronization.h" #include "Types.h" +#include "Utils.h" using namespace _OMP; @@ -147,7 +148,7 @@ void SharedMemorySmartStackTy::pop(void *Ptr, uint32_t Bytes) { uint64_t AlignedBytes = utils::align_up(Bytes, Alignment); - if (Ptr >= &Data[0] && Ptr < &Data[state::SharedScratchpadSize]) { + if (utils::isSharedMemPtr(Ptr)) { int TId = mapping::getThreadIdInBlock(); Usage[TId] -= AlignedBytes; return; diff --git a/openmp/libomptarget/DeviceRTL/src/Utils.cpp b/openmp/libomptarget/DeviceRTL/src/Utils.cpp --- a/openmp/libomptarget/DeviceRTL/src/Utils.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Utils.cpp @@ -32,6 +32,7 @@ namespace impl { +bool isSharedMemPtr(const void *Ptr) { return false; } void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits); uint64_t Pack(uint32_t LowBits, uint32_t HighBits); @@ -51,6 +52,7 @@ } #pragma omp end declare variant +///} /// NVPTX Implementation /// @@ -74,6 +76,7 @@ } #pragma omp end declare variant +///} int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane); int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta, @@ -99,6 +102,9 @@ return __builtin_amdgcn_ds_bpermute(Index << 2, Var); } +bool isSharedMemPtr(const void * Ptr) { + return __builtin_amdgcn_is_shared((const __attribute__((address_space(0))) void *)Ptr); +} #pragma omp end declare variant ///} @@ -117,7 +123,10 @@ return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T); } +bool isSharedMemPtr(const void *Ptr) { return __nvvm_isspacep_shared(Ptr); } + #pragma omp end declare variant +///} } // namespace impl uint64_t utils::pack(uint32_t LowBits, uint32_t HighBits) { @@ -137,6 +146,8 @@ return impl::shuffleDown(Mask, Var, Delta, Width); } +bool utils::isSharedMemPtr(void *Ptr) { return impl::isSharedMemPtr(Ptr); } + extern "C" { int32_t __kmpc_shuffle_int32(int32_t Val, int16_t Delta, int16_t SrcLane) { FunctionTracingRAII();