diff --git a/openmp/libomptarget/DeviceRTL/include/Mapping.h b/openmp/libomptarget/DeviceRTL/include/Mapping.h --- a/openmp/libomptarget/DeviceRTL/include/Mapping.h +++ b/openmp/libomptarget/DeviceRTL/include/Mapping.h @@ -34,9 +34,19 @@ bool isGenericMode(); /// Return true if the executing thread is the main thread in generic mode. +/// These functions will lookup state and it is required that that is OK for the +/// thread and location. See also `isInitialThreadInLevel0` for a stateless +/// alternative for certain situations, e.g. during initialization. bool isMainThreadInGenericMode(); bool isMainThreadInGenericMode(bool IsSPMD); +/// Return true if this thread is the initial thread in parallel level 0. +/// +/// The thread for which this returns true should be used for single threaded +/// initialization tasks. We pick a special thread to ensure there are no +/// races between the initialization and the first read of initialized state. +bool isInitialThreadInLevel0(bool IsSPMD); + /// Return true if the executing thread has the lowest Id of the active threads /// in the warp. bool isLeaderInWarp(); diff --git a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp --- a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp @@ -83,7 +83,7 @@ return -1; } - if (mapping::isMainThreadInGenericMode(IsSPMD)) + if (mapping::isInitialThreadInLevel0(IsSPMD)) return -1; if (UseGenericStateMachine) diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp --- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp @@ -164,20 +164,30 @@ } // namespace impl } // namespace _OMP +static bool isInLastWarp() { + uint32_t MainTId = (mapping::getNumberOfProcessorElements() - 1) & + ~(mapping::getWarpSize() - 1); + return mapping::getThreadIdInBlock() == MainTId; +} + bool mapping::isMainThreadInGenericMode(bool IsSPMD) { if (IsSPMD || icv::Level) return false; // Check if this is the last warp in the block. - uint32_t MainTId = (mapping::getNumberOfProcessorElements() - 1) & - ~(mapping::getWarpSize() - 1); - return mapping::getThreadIdInBlock() == MainTId; + return isInLastWarp(); } bool mapping::isMainThreadInGenericMode() { return mapping::isMainThreadInGenericMode(mapping::isSPMDMode()); } +bool mapping::isInitialThreadInLevel0(bool IsSPMD) { + if (IsSPMD) + return mapping::getThreadIdInBlock() == 0; + return isInLastWarp(); +} + bool mapping::isLeaderInWarp() { __kmpc_impl_lanemask_t Active = mapping::activemask(); __kmpc_impl_lanemask_t LaneMaskLT = mapping::lanemaskLT(); @@ -220,7 +230,7 @@ static int SHARED(IsSPMDMode); void mapping::init(bool IsSPMD) { - if (!mapping::getThreadIdInBlock()) + if (mapping::isInitialThreadInLevel0(IsSPMD)) IsSPMDMode = IsSPMD; } diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -366,7 +366,7 @@ void state::init(bool IsSPMD) { SharedMemorySmartStack.init(IsSPMD); - if (!mapping::getThreadIdInBlock()) + if (mapping::isInitialThreadInLevel0(IsSPMD)) TeamState.init(IsSPMD); ThreadStates[mapping::getThreadIdInBlock()] = nullptr;