diff --git a/openmp/libomptarget/DeviceRTL/include/State.h b/openmp/libomptarget/DeviceRTL/include/State.h --- a/openmp/libomptarget/DeviceRTL/include/State.h +++ b/openmp/libomptarget/DeviceRTL/include/State.h @@ -40,56 +40,61 @@ }; /// TODO -void enterDataEnvironment(); +void enterDataEnvironment(IdentTy *Ident); /// TODO void exitDataEnvironment(); /// TODO struct DateEnvironmentRAII { - DateEnvironmentRAII() { enterDataEnvironment(); } + DateEnvironmentRAII(IdentTy *Ident) { enterDataEnvironment(Ident); } ~DateEnvironmentRAII() { exitDataEnvironment(); } }; /// TODO void resetStateForThread(uint32_t TId); -uint32_t &lookup32(ValueKind VK, bool IsReadonly); +uint32_t &lookup32(ValueKind VK, bool IsReadonly, IdentTy *Ident); void *&lookupPtr(ValueKind VK, bool IsReadonly); /// A class without actual state used to provide a nice interface to lookup and /// update ICV values we can declare in global scope. template struct Value { __attribute__((flatten, always_inline)) operator Ty() { - return lookup(/* IsReadonly */ true); + return lookup(/* IsReadonly */ true, /* IdentTy */ nullptr); } __attribute__((flatten, always_inline)) Value &operator=(const Ty &Other) { - set(Other); + set(Other, /* IdentTy */ nullptr); return *this; } __attribute__((flatten, always_inline)) Value &operator++() { - inc(1); + inc(1, /* IdentTy */ nullptr); return *this; } __attribute__((flatten, always_inline)) Value &operator--() { - inc(-1); + inc(-1, /* IdentTy */ nullptr); return *this; } private: - Ty &lookup(bool IsReadonly) { - Ty &t = lookup32(Kind, IsReadonly); + __attribute__((flatten, always_inline)) Ty &lookup(bool IsReadonly, + IdentTy *Ident) { + Ty &t = lookup32(Kind, IsReadonly, Ident); return t; } - Ty &inc(int UpdateVal) { - return (lookup(/* IsReadonly */ false) += UpdateVal); + __attribute__((flatten, always_inline)) Ty &inc(int UpdateVal, + IdentTy *Ident) { + return (lookup(/* IsReadonly */ false, Ident) += UpdateVal); } - Ty &set(Ty UpdateVal) { return (lookup(/* IsReadonly */ false) = UpdateVal); } + __attribute__((flatten, always_inline)) Ty &set(Ty UpdateVal, + IdentTy *Ident) { + return (lookup(/* IsReadonly */ false, Ident) = UpdateVal); + } template friend struct ValueRAII; }; @@ -99,7 +104,7 @@ /// we can declare in global scope. template struct PtrValue { __attribute__((flatten, always_inline)) operator Ty() { - return lookup(/* IsReadonly */ true); + return lookup(/* IsReadonly */ true, /* IdentTy */ nullptr); } __attribute__((flatten, always_inline)) PtrValue &operator=(const Ty Other) { @@ -108,17 +113,19 @@ } private: - Ty &lookup(bool IsReadonly) { return lookupPtr(Kind, IsReadonly); } + Ty &lookup(bool IsReadonly, IdentTy *) { return lookupPtr(Kind, IsReadonly); } - Ty &set(Ty UpdateVal) { return (lookup(/* IsReadonly */ false) = UpdateVal); } + Ty &set(Ty UpdateVal) { + return (lookup(/* IsReadonly */ false, /* IdentTy */ nullptr) = UpdateVal); + } template friend struct ValueRAII; }; template struct ValueRAII { - ValueRAII(VTy &V, Ty NewValue, Ty OldValue, bool Active) - : Ptr(Active ? V.lookup(/* IsReadonly */ false) : Val), Val(OldValue), - Active(Active) { + ValueRAII(VTy &V, Ty NewValue, Ty OldValue, bool Active, IdentTy *Ident) + : Ptr(Active ? V.lookup(/* IsReadonly */ false, Ident) : Val), + Val(OldValue), Active(Active) { if (!Active) return; ASSERT(Ptr == OldValue && "ValueRAII initialization with wrong old value!"); diff --git a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp --- a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp @@ -87,7 +87,7 @@ uint32_t TId = mapping::getThreadIdInBlock(); // Handle the serialized case first, same for SPMD/non-SPMD. if (OMP_UNLIKELY(!if_expr || icv::Level)) { - state::enterDataEnvironment(); + state::DateEnvironmentRAII DERAII(ident); ++icv::Level; invokeMicrotask(TId, 0, fn, args, nargs); state::exitDataEnvironment(); @@ -104,9 +104,10 @@ // last or the other updates will cause a thread specific state to be // created. state::ValueRAII ParallelTeamSizeRAII(state::ParallelTeamSize, NumThreads, - 1u, TId == 0); - state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, TId == 0); - state::ValueRAII LevelRAII(icv::Level, 1u, 0u, TId == 0); + 1u, TId == 0, ident); + state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, TId == 0, + ident); + state::ValueRAII LevelRAII(icv::Level, 1u, 0u, TId == 0, ident); // Synchronize all threads after the main thread (TId == 0) set up the // team state properly. @@ -142,7 +143,7 @@ bool IsActiveParallelRegion = NumThreads > 1; if (!IsActiveParallelRegion) { - state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true); + state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true, ident); invokeMicrotask(TId, 0, fn, args, nargs); return; } @@ -160,11 +161,11 @@ // last or the other updates will cause a thread specific state to be // created. state::ValueRAII ParallelTeamSizeRAII(state::ParallelTeamSize, NumThreads, - 1u, true); + 1u, true, ident); state::ValueRAII ParallelRegionFnRAII(state::ParallelRegionFn, wrapper_fn, - (void *)nullptr, true); - state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, true); - state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true); + (void *)nullptr, true, ident); + state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, true, ident); + state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true, ident); // Master signals work to activate workers. synchronize::threads(); diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -281,7 +281,7 @@ ThreadStateTy *ThreadStates[mapping::MaxThreadsPerTeam]; #pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc) -uint32_t &lookupForModify32Impl(uint32_t ICVStateTy::*Var) { +uint32_t &lookupForModify32Impl(uint32_t ICVStateTy::*Var, IdentTy *Ident) { if (OMP_LIKELY(TeamState.ICVState.LevelVar == 0)) return TeamState.ICVState.*Var; uint32_t TId = mapping::getThreadIdInBlock(); @@ -322,32 +322,32 @@ } // namespace -uint32_t &state::lookup32(ValueKind Kind, bool IsReadonly) { +uint32_t &state::lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident) { switch (Kind) { case state::VK_NThreads: if (IsReadonly) return lookup32Impl(&ICVStateTy::NThreadsVar); - return lookupForModify32Impl(&ICVStateTy::NThreadsVar); + return lookupForModify32Impl(&ICVStateTy::NThreadsVar, Ident); case state::VK_Level: if (IsReadonly) return lookup32Impl(&ICVStateTy::LevelVar); - return lookupForModify32Impl(&ICVStateTy::LevelVar); + return lookupForModify32Impl(&ICVStateTy::LevelVar, Ident); case state::VK_ActiveLevel: if (IsReadonly) return lookup32Impl(&ICVStateTy::ActiveLevelVar); - return lookupForModify32Impl(&ICVStateTy::ActiveLevelVar); + return lookupForModify32Impl(&ICVStateTy::ActiveLevelVar, Ident); case state::VK_MaxActiveLevels: if (IsReadonly) return lookup32Impl(&ICVStateTy::MaxActiveLevelsVar); - return lookupForModify32Impl(&ICVStateTy::MaxActiveLevelsVar); + return lookupForModify32Impl(&ICVStateTy::MaxActiveLevelsVar, Ident); case state::VK_RunSched: if (IsReadonly) return lookup32Impl(&ICVStateTy::RunSchedVar); - return lookupForModify32Impl(&ICVStateTy::RunSchedVar); + return lookupForModify32Impl(&ICVStateTy::RunSchedVar, Ident); case state::VK_RunSchedChunk: if (IsReadonly) return lookup32Impl(&ICVStateTy::RunSchedChunkVar); - return lookupForModify32Impl(&ICVStateTy::RunSchedChunkVar); + return lookupForModify32Impl(&ICVStateTy::RunSchedChunkVar, Ident); case state::VK_ParallelTeamSize: return TeamState.ParallelTeamSize; default: @@ -376,7 +376,7 @@ ThreadStates[mapping::getThreadIdInBlock()] = nullptr; } -void state::enterDataEnvironment() { +void state::enterDataEnvironment(IdentTy *Ident) { unsigned TId = mapping::getThreadIdInBlock(); ThreadStateTy *NewThreadState = static_cast(__kmpc_alloc_shared(sizeof(ThreadStateTy))); diff --git a/openmp/libomptarget/DeviceRTL/src/Tasking.cpp b/openmp/libomptarget/DeviceRTL/src/Tasking.cpp --- a/openmp/libomptarget/DeviceRTL/src/Tasking.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Tasking.cpp @@ -49,7 +49,7 @@ TaskDescriptorTy *TaskDescriptor, int32_t, void *, int32_t, void *) { FunctionTracingRAII(); - state::DateEnvironmentRAII DERAII; + state::DateEnvironmentRAII DERAII(Loc); TaskDescriptor->TaskFn(0, TaskDescriptor); @@ -60,7 +60,7 @@ void __kmpc_omp_task_begin_if0(IdentTy *Loc, uint32_t TId, TaskDescriptorTy *TaskDescriptor) { FunctionTracingRAII(); - state::enterDataEnvironment(); + state::enterDataEnvironment(Loc); } void __kmpc_omp_task_complete_if0(IdentTy *Loc, uint32_t TId,