diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -781,7 +781,7 @@ emitGenericVarsEpilog(CGF); CGBuilderTy &Bld = CGF.Builder; - OMPBuilder.createTargetDeinit(Bld, IsSPMD); + OMPBuilder.createTargetDeinit(Bld); } void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D, diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1461,8 +1461,7 @@ /// Create a runtime call for kmpc_target_deinit /// /// \param Loc The insert and source location description. - /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. - void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD); + void createTargetDeinit(const LocationDescription &Loc); ///} diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -96,6 +96,10 @@ Int64, Int64, Int32Arr3Ty, Int32Arr3Ty, Int32) __OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, false, Int8Ptr) __OMP_STRUCT_TYPE(DependInfo, kmp_dep_info, false, SizeTy, SizeTy, Int8) +__OMP_STRUCT_TYPE(ConfigurationEnvironmentTy, ConfigurationEnvironmentTy, false, + Int8, Int8, Int8) +__OMP_STRUCT_TYPE(KernelEnvironmentTy, KernelEnvironmentTy, false, IdentPtr, + ConfigurationEnvironmentTy, Int16) #undef __OMP_STRUCT_TYPE #undef OMP_STRUCT_TYPE @@ -452,8 +456,8 @@ /* Int */ Int32, /* kmp_task_t */ VoidPtr) /// OpenMP Device runtime functions -__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1) -__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8) +__OMP_RTL(__kmpc_target_init, false, Int32, KernelEnvironmentTyPtr) +__OMP_RTL(__kmpc_target_deinit, false, Void,) __OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) __OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32, VoidPtr, VoidPtr, VoidPtrPtr, SizeTy) @@ -1012,9 +1016,9 @@ ReturnPtrAttrs, ParamAttrs(ReadOnlyPtrAttrs, SExt)) __OMP_RTL_ATTRS(__kmpc_target_init, AttributeSet(), SExt, - ParamAttrs(AttributeSet(), SExt, SExt)) + ParamAttrs(AttributeSet())) __OMP_RTL_ATTRS(__kmpc_target_deinit, AttributeSet(), AttributeSet(), - ParamAttrs(AttributeSet(), SExt)) + ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_parallel_51, AlwaysInlineAttrs, AttributeSet(), ParamAttrs(AttributeSet(), SExt, SExt, SExt, SExt, AttributeSet(), AttributeSet(), AttributeSet(), diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -3878,14 +3878,42 @@ ConstantInt *IsSPMDVal = ConstantInt::getSigned( IntegerType::getInt8Ty(Int8->getContext()), IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); - ConstantInt *UseGenericStateMachine = - ConstantInt::getBool(Int32->getContext(), !IsSPMD); - + ConstantInt *UseGenericStateMachineVal = ConstantInt::getSigned( + IntegerType::getInt8Ty(Int8->getContext()), !IsSPMD); + ConstantInt *MayUseNestedParallelismVal = + ConstantInt::getSigned(IntegerType::getInt8Ty(Int8->getContext()), true); + ConstantInt *DebugIndentionLevelVal = + ConstantInt::getSigned(IntegerType::getInt16Ty(Int8->getContext()), true); + + Function *Kernel = Builder.GetInsertBlock()->getParent(); Function *Fn = getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_target_init); - - CallInst *ThreadKind = Builder.CreateCall( - Fn, {Ident, IsSPMDVal, UseGenericStateMachine}); + const DataLayout &DL = Fn->getParent()->getDataLayout(); + + Constant *ConfigurationEnvironmentInitializer = ConstantStruct::get( + ConfigurationEnvironmentTy, { + UseGenericStateMachineVal, + MayUseNestedParallelismVal, + IsSPMDVal, + }); + Constant *KernelEnvironmentInitializer = ConstantStruct::get( + KernelEnvironmentTy, { + Ident, + ConfigurationEnvironmentInitializer, + DebugIndentionLevelVal, + }); + Twine KernelEnvironmentName = Kernel->getName() + "_kernel_info"; + GlobalVariable *KernelEnvironment = new GlobalVariable( + M, KernelEnvironmentTy, /* IsConstant */ true, + GlobalValue::ExternalLinkage, KernelEnvironmentInitializer, + KernelEnvironmentName, + /* InsertBefore */ nullptr, llvm::GlobalValue::NotThreadLocal, + DL.getDefaultGlobalsAddressSpace()); + Constant *KernelEnvironmentCasted = + ConstantExpr::getPointerBitCastOrAddrSpaceCast(KernelEnvironment, + KernelEnvironmentTyPtr); + + CallInst *ThreadKind = Builder.CreateCall(Fn, {KernelEnvironmentCasted}); Value *ExecUserCode = Builder.CreateICmpEQ( ThreadKind, ConstantInt::get(ThreadKind->getType(), -1), @@ -3918,22 +3946,14 @@ return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt()); } -void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc, - bool IsSPMD) { +void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc) { if (!updateToLocation(Loc)) return; - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); - ConstantInt *IsSPMDVal = ConstantInt::getSigned( - IntegerType::getInt8Ty(Int8->getContext()), - IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); - Function *Fn = getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_target_deinit); - Builder.CreateCall(Fn, {Ident, IsSPMDVal}); + Builder.CreateCall(Fn, {}); } void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes( diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -44,6 +44,7 @@ #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -179,6 +180,71 @@ namespace { +/// Return the constant element of \p StructC at \p Offset. +template +unsigned getConstantStructOffsetIdx(const DataLayout &DL, + ConstantStruct *StructC) { + StructType *ST = cast(StructC->getType()); + return DL.getStructLayout(ST)->getElementContainingOffset(Offset); +} + +/// Return the constant element of \p StructC at \p Offset. +template +Constant *getConstantStructOffsetElementAddr(const DataLayout &DL, + ConstantStruct *StructC, + Constant *Ptr) { + unsigned Idx = getConstantStructOffsetIdx(DL, StructC); + StructType *ST = cast(StructC->getType()); + return ConstantExpr::getInBoundsGetElementPtr( + ST, Ptr, + ArrayRef{ + ConstantInt::getNullValue(IntegerType::getInt32Ty(ST->getContext())), + ConstantInt::get(IntegerType::getInt32Ty(ST->getContext()), Idx)}); +} + +/// Return the constant element of \p StructC at \p Offset. +template +Constant *getConstantStructOffsetElement(const DataLayout &DL, + ConstantStruct *StructC) { + return StructC->getAggregateElement( + getConstantStructOffsetIdx(DL, StructC)); +} + +/// Set the constant element of \p StructC at \p Offset to be \p V. +template +static ConstantStruct *setConstantStructOffsetElement(const DataLayout &DL, + ConstantStruct *StructC, + Constant *V) { + unsigned Idxs[1] = {getConstantStructOffsetIdx(DL, StructC)}; + Constant *NewStructC = ConstantFoldInsertValueInstruction(StructC, V, Idxs); + assert(NewStructC && "Failed to create constant kernel environment!"); + return cast(NewStructC); +} + +#define GET_KERNEL_ENVIRONMENT_MEMBER(Member, DL, StructC) \ + getConstantStructOffsetElement(DL, StructC) +#define GET_KERNEL_ENVIRONMENT_MEMBER_ADDR(Member, DL, StructC, Ptr) \ + getConstantStructOffsetElementAddr(DL, StructC, Ptr) +#define SET_KERNEL_ENVIRONMENT_MEMBER(Member, DL, StructC, V) \ + setConstantStructOffsetElement(DL, StructC, V) +#define GET_KERNEL_CONFIGURATION_ENVIRONMENT_MEMBER(Member, DL, StructC) \ + getConstantStructOffsetElement( \ + DL, cast( \ + GET_KERNEL_ENVIRONMENT_MEMBER(Configuration, DL, StructC))) +#define SET_KERNEL_CONFIGURATION_ENVIRONMENT_MEMBER(Member, DL, StructC, V) \ + SET_KERNEL_ENVIRONMENT_MEMBER( \ + Configuration, DL, StructC, \ + setConstantStructOffsetElement( \ + DL, \ + cast( \ + GET_KERNEL_ENVIRONMENT_MEMBER(Configuration, DL, StructC)), \ + V)) + struct AAHeapToShared; struct AAICVTracker; @@ -588,6 +654,10 @@ /// one we abort as the kernel is malformed. CallBase *KernelInitCB = nullptr; + /// The constant kernel environement as taken from and passed to + /// __kmpc_target_init. + ConstantStruct *KernelEnvC = nullptr; + /// The __kmpc_target_deinit call in this kernel, if any. If we find more than /// one we abort as the kernel is malformed. CallBase *KernelDeinitCB = nullptr; @@ -692,6 +762,12 @@ "assumptions."); KernelDeinitCB = KIS.KernelDeinitCB; } + if (KIS.KernelEnvC) { + if (KernelEnvC && KernelEnvC != KIS.KernelEnvC) + llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt " + "assumptions."); + KernelEnvC = KIS.KernelEnvC; + } SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker; ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions; ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions; @@ -3400,6 +3476,50 @@ return GuardedInstructions; } + /// Return the IdentTy (ident_ty) corresponding to the associated kernel. + Constant *getKernelIdent(ConstantStruct *StructC) { + GlobalVariable *KernelEnvGV = getKernelEnvironementGlobalVariable(); + auto *KernelEnvC = cast(KernelEnvGV->getInitializer()); + constexpr const unsigned IdentIdx = 1; + return KernelEnvC->getAggregateElement(IdentIdx); + } + + ConstantStruct *getConfigurationFromKernelEnvC(ConstantStruct *KernelEnvC) { + constexpr const unsigned ConfigurationIdx = 0; + auto *ConfigC = + cast(KernelEnvC->getAggregateElement(ConfigurationIdx)); + return ConfigC; + } + + ConstantInt * + getUseGenericStateMachineFromKernelConfiguration(ConstantStruct *KernelEnvC) { + ConstantStruct *ConfigC = getConfigurationFromKernelEnvC(KernelEnvC); + constexpr const unsigned UseGenericStateMachineIdx = 0; + return cast( + ConfigC->getAggregateElement(UseGenericStateMachineIdx)); + } + + ConstantInt *getExecModeFromKernelConfiguration(ConstantStruct *KernelEnvC) { + ConstantStruct *ConfigC = getConfigurationFromKernelEnvC(KernelEnvC); + constexpr const unsigned ExecModeIdx = 2; + return cast(ConfigC->getAggregateElement(ExecModeIdx)); + } + + ConstantInt *getMayUseNestedParallelismFromKernelConfiguration( + ConstantStruct *KernelEnvC) { + ConstantStruct *ConfigC = getConfigurationFromKernelEnvC(KernelEnvC); + constexpr const unsigned MayUseNestedParallelismIdx = 1; + return cast( + ConfigC->getAggregateElement(MayUseNestedParallelismIdx)); + } + + GlobalVariable *getKernelEnvironementGlobalVariable() { + constexpr const int InitKernelEnvironmentArgNo = 0; + return cast( + KernelInitCB->getArgOperand(InitKernelEnvironmentArgNo) + ->stripPointerCasts()); + } + /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { // This is a high-level transform that might change the constant arguments @@ -3448,6 +3568,18 @@ ReachingKernelEntries.insert(Fn); IsKernelEntry = true; + GlobalVariable *KernelEnvGV = getKernelEnvironementGlobalVariable(); + KernelEnvC = cast(KernelEnvGV->getInitializer()); + auto *ExecModeC = getExecModeFromKernelConfiguration(KernelEnvC); + auto *AssumedExecModeC = ConstantInt::get( + ExecModeC->getType(), + ExecModeC->getZExtValue() | OMP_TGT_EXEC_MODE_GENERIC_SPMD); + + // Next rewrite the kernel configuration to indicate we use SPMD-mode now. + auto &Ctx = getAnchorValue().getContext(); + if (!DisableOpenMPOptSPMDization) + setKernelConfigurationExecMode(KernelEnvC, AssumedExecModeC); + // For kernels we might need to initialize/finalize the IsSPMD state and // we need to register a simplification callback so that the Attributor // knows the constant arguments to __kmpc_target_init and @@ -3497,23 +3629,18 @@ return Val; }; - constexpr const int InitModeArgNo = 1; - constexpr const int DeinitModeArgNo = 1; - constexpr const int InitUseStateMachineArgNo = 2; - A.registerSimplificationCallback( - IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo), - StateMachineSimplifyCB); - A.registerSimplificationCallback( - IRPosition::callsite_argument(*KernelInitCB, InitModeArgNo), - ModeSimplifyCB); + Attributor::SimplifictionCallbackTy KernelConfigurationSimplifyCB = + [&](const IRPosition &IRP, const AbstractAttribute *AA, + bool &UsedAssumedInformation) -> std::optional { + return KernelEnvC; + }; + A.registerSimplificationCallback( - IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo), - ModeSimplifyCB); + IRPosition::value(*KernelEnvGV->getInitializer()), + KernelConfigurationSimplifyCB); // Check if we know we are in SPMD-mode already. - ConstantInt *ModeArg = - dyn_cast(KernelInitCB->getArgOperand(InitModeArgNo)); - if (ModeArg && (ModeArg->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) + if (ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD) SPMDCompatibilityTracker.indicateOptimisticFixpoint(); // This is a generic region but SPMDization is disabled so stop tracking. else if (DisableOpenMPOptSPMDization) diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -128,6 +128,7 @@ -nocudalib -nogpulib -nostdinc -fopenmp -fopenmp-cuda-mode -Wno-unknown-cuda-version + -DOMPTARGET_DEVICE_RUNTIME -I${include_directory} -I${devicertl_base_directory}/../include ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL} diff --git a/openmp/libomptarget/DeviceRTL/include/Configuration.h b/openmp/libomptarget/DeviceRTL/include/Configuration.h --- a/openmp/libomptarget/DeviceRTL/include/Configuration.h +++ b/openmp/libomptarget/DeviceRTL/include/Configuration.h @@ -13,11 +13,16 @@ #ifndef OMPTARGET_CONFIGURATION_H #define OMPTARGET_CONFIGURATION_H +#include "Environment.h" #include "Types.h" +#pragma omp begin declare target device_type(nohost) + namespace ompx { namespace config { +extern ConfigurationEnvironmentTy *ConfigurationEnvironment; + enum DebugKind : uint32_t { Assertion = 1U << 0, FunctionTracing = 1U << 1, @@ -51,4 +56,6 @@ } // namespace config } // namespace ompx +#pragma omp end declare target + #endif diff --git a/openmp/libomptarget/DeviceRTL/include/Debug.h b/openmp/libomptarget/DeviceRTL/include/Debug.h --- a/openmp/libomptarget/DeviceRTL/include/Debug.h +++ b/openmp/libomptarget/DeviceRTL/include/Debug.h @@ -50,8 +50,6 @@ struct DebugEntryRAII { DebugEntryRAII(const char *File, const unsigned Line, const char *Function); ~DebugEntryRAII(); - - static void init(); }; #endif diff --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h --- a/openmp/libomptarget/DeviceRTL/include/Interface.h +++ b/openmp/libomptarget/DeviceRTL/include/Interface.h @@ -214,12 +214,14 @@ /// Kernel /// ///{ +// Forward declaration +struct KernelEnvironmentTy; + int8_t __kmpc_is_spmd_exec_mode(); -int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode, - bool UseGenericStateMachine); +int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment); -void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode); +void __kmpc_target_deinit(); ///} diff --git a/openmp/libomptarget/DeviceRTL/include/State.h b/openmp/libomptarget/DeviceRTL/include/State.h --- a/openmp/libomptarget/DeviceRTL/include/State.h +++ b/openmp/libomptarget/DeviceRTL/include/State.h @@ -17,6 +17,9 @@ #include "Types.h" #include "Utils.h" +// Forward declaration. +struct KernelEnvironmentTy; + #pragma omp begin declare target device_type(nohost) namespace ompx { @@ -113,7 +116,10 @@ #pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc) /// Initialize the state machinery. Must be called by all threads. -void init(bool IsSPMD); +void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment); + +/// Return the kernel environment associated with the current kernel. +KernelEnvironmentTy &getKernelEnvironment(); /// TODO enum ValueKind { diff --git a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp --- a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "Configuration.h" -#include "DeviceEnvironment.h" +#include "Environment.h" #include "State.h" #include "Types.h" @@ -23,7 +23,6 @@ // defined by CGOpenMPRuntimeGPU extern uint32_t __omp_rtl_debug_kind; extern uint32_t __omp_rtl_assume_no_thread_state; -extern uint32_t __omp_rtl_assume_no_nested_parallelism; // This variable should be visibile to the plugin so we override the default // hidden visibility. @@ -53,7 +52,7 @@ bool config::mayUseThreadStates() { return !__omp_rtl_assume_no_thread_state; } bool config::mayUseNestedParallelism() { - return !__omp_rtl_assume_no_nested_parallelism; + return state::getKernelEnvironment().Configuration.MayUseNestedParallelism; } #pragma omp end declare target diff --git a/openmp/libomptarget/DeviceRTL/src/Debug.cpp b/openmp/libomptarget/DeviceRTL/src/Debug.cpp --- a/openmp/libomptarget/DeviceRTL/src/Debug.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Debug.cpp @@ -14,6 +14,7 @@ #include "Configuration.h" #include "Interface.h" #include "Mapping.h" +#include "State.h" #include "Types.h" using namespace ompx; @@ -31,15 +32,13 @@ } } -/// Current indentation level for the function trace. Only accessed by thread 0. -__attribute__((loader_uninitialized)) static uint32_t Level; -#pragma omp allocate(Level) allocator(omp_pteam_mem_alloc) - DebugEntryRAII::DebugEntryRAII(const char *File, const unsigned Line, const char *Function) { if (config::isDebugMode(config::DebugKind::FunctionTracing) && mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0) { + uint16_t &Level = state::getKernelEnvironment().DebugIndentionLevel; + for (int I = 0; I < Level; ++I) PRINTF("%s", " "); @@ -51,10 +50,10 @@ DebugEntryRAII::~DebugEntryRAII() { if (config::isDebugMode(config::DebugKind::FunctionTracing) && - mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0) + mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0) { + uint16_t &Level = state::getKernelEnvironment().DebugIndentionLevel; Level--; + } } -void DebugEntryRAII::init() { Level = 0; } - #pragma omp end declare target diff --git a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp --- a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "Debug.h" +#include "Environment.h" #include "Interface.h" #include "Mapping.h" #include "State.h" @@ -23,11 +24,12 @@ #pragma omp begin declare target device_type(nohost) -static void inititializeRuntime(bool IsSPMD) { +static void inititializeRuntime(bool IsSPMD, + KernelEnvironmentTy &KernelEnvironment) { // Order is important here. synchronize::init(IsSPMD); mapping::init(IsSPMD); - state::init(IsSPMD); + state::init(IsSPMD, KernelEnvironment); } /// Simple generic state machine for worker threads. @@ -67,16 +69,17 @@ /// /// \param Ident Source location identification, can be NULL. /// -int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode, - bool UseGenericStateMachine) { +int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment) { FunctionTracingRAII(); - const bool IsSPMD = - Mode & llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD; + ConfigurationEnvironmentTy &Configuration = KernelEnvironment.Configuration; + bool IsSPMD = Configuration.ExecMode & + llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD; + bool UseGenericStateMachine = Configuration.UseGenericStateMachine; if (IsSPMD) { - inititializeRuntime(/* IsSPMD */ true); + inititializeRuntime(/* IsSPMD */ true, KernelEnvironment); synchronize::threadsAligned(); } else { - inititializeRuntime(/* IsSPMD */ false); + inititializeRuntime(/* IsSPMD */ false, KernelEnvironment); // No need to wait since only the main threads will execute user // code and workers will run into a barrier right away. } @@ -104,7 +107,7 @@ // thread's warp, so none of its threads can ever be active worker threads. if (UseGenericStateMachine && mapping::getThreadIdInBlock() < mapping::getBlockSize(IsSPMD)) { - genericStateMachine(Ident); + genericStateMachine(KernelEnvironment.Ident); } else { // Retrieve the work function just to ensure we always call // __kmpc_kernel_parallel even if a custom state machine is used. @@ -128,10 +131,9 @@ /// /// \param Ident Source location identification, can be NULL. /// -void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode) { +void __kmpc_target_deinit() { FunctionTracingRAII(); - const bool IsSPMD = - Mode & llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD; + bool IsSPMD = mapping::isSPMDMode(); state::assumeInitialState(IsSPMD); if (IsSPMD) return; diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp --- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp @@ -10,6 +10,7 @@ //===----------------------------------------------------------------------===// #include "Mapping.h" +#include "Configuration.h" #include "Interface.h" #include "State.h" #include "Types.h" diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -9,8 +9,8 @@ //===----------------------------------------------------------------------===// #include "State.h" -#include "Configuration.h" #include "Debug.h" +#include "Environment.h" #include "Interface.h" #include "Mapping.h" #include "Synchronization.h" @@ -34,6 +34,9 @@ extern unsigned char DynamicSharedBuffer[] __attribute__((aligned(Alignment))); #pragma omp allocate(DynamicSharedBuffer) allocator(omp_pteam_mem_alloc) +/// The kernel environment passed to the init method by the compiler. +static KernelEnvironmentTy *SHARED(KernelEnvironmentPtr); + namespace { /// Fallback implementations are missing to trigger a link time error. @@ -241,15 +244,19 @@ } // namespace -void state::init(bool IsSPMD) { +void state::init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment) { SharedMemorySmartStack.init(IsSPMD); if (mapping::isInitialThreadInLevel0(IsSPMD)) { TeamState.init(IsSPMD); - DebugEntryRAII::init(); ThreadStates = nullptr; + KernelEnvironmentPtr = &KernelEnvironment; } } +KernelEnvironmentTy &state::getKernelEnvironment() { + return *KernelEnvironmentPtr; +} + void state::enterDataEnvironment(IdentTy *Ident) { ASSERT(config::mayUseThreadStates() && "Thread state modified while explicitly disabled!"); diff --git a/openmp/libomptarget/include/DeviceEnvironment.h b/openmp/libomptarget/include/DeviceEnvironment.h deleted file mode 100644 --- a/openmp/libomptarget/include/DeviceEnvironment.h +++ /dev/null @@ -1,25 +0,0 @@ -//===---- device_environment.h - OpenMP GPU device environment ---- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Global device environment -// -//===----------------------------------------------------------------------===// - -#ifndef _OMPTARGET_DEVICE_ENVIRONMENT_H_ -#define _OMPTARGET_DEVICE_ENVIRONMENT_H_ - -// deviceRTL uses and DeviceRTL uses explicit definitions - -struct DeviceEnvironmentTy { - uint32_t DebugKind; - uint32_t NumDevices; - uint32_t DeviceNum; - uint32_t DynamicMemSize; -}; - -#endif diff --git a/openmp/libomptarget/include/Environment.h b/openmp/libomptarget/include/Environment.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/include/Environment.h @@ -0,0 +1,53 @@ +//===------------ Environment.h - OpenMP GPU environments --------- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Environments shared between host and device. +// +//===----------------------------------------------------------------------===// + +#ifndef _OMPTARGET_ENVIRONMENT_H_ +#define _OMPTARGET_ENVIRONMENT_H_ + +#ifdef OMPTARGET_DEVICE_RUNTIME +#include "Types.h" +#else +#include "SourceInfo.h" + +#include + +using IdentTy = ident_t; +#endif + +#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h" + +struct DeviceEnvironmentTy { + uint32_t DebugKind; + uint32_t NumDevices; + uint32_t DeviceNum; + uint32_t DynamicMemSize; +}; + +// NOTE: Please don't change the order of those members as they are used in the +// middle end. Always add the new data member at the end. +struct ConfigurationEnvironmentTy { + uint8_t UseGenericStateMachine; + uint8_t MayUseNestedParallelism; + llvm::omp::OMPTgtExecModeFlags ExecMode; +}; + +// NOTE: Please don't change the order of those members as they are used in the +// middle end. Always add the new data member at the end. +struct KernelEnvironmentTy { + ConfigurationEnvironmentTy Configuration; + IdentTy *Ident; + /// Current indentation level for the function trace. Only accessed by thread + /// 0. + uint16_t DebugIndentionLevel; +}; + +#endif // _OMPTARGET_ENVIRONMENT_H_ diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -21,7 +21,7 @@ #include #include "Debug.h" -#include "DeviceEnvironment.h" +#include "Environment.h" #include "GlobalHandler.h" #include "PluginInterface.h" #include "Utilities.h" @@ -431,7 +431,7 @@ /// Launch the AMDGPU kernel function. Error launchImpl(GenericDeviceTy &GenericDevice, uint32_t NumThreads, - uint64_t NumBlocks, + uint64_t NumBlocks, KernelArgsTy &KernelArgs, void *Args, AsyncInfoWrapperTy &AsyncInfoWrapper) const override; diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h @@ -19,7 +19,7 @@ #include #include "Debug.h" -#include "DeviceEnvironment.h" +#include "Environment.h" #include "GlobalHandler.h" #include "JIT.h" #include "MemoryManager.h" diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp @@ -17,7 +17,7 @@ #include #include "Debug.h" -#include "DeviceEnvironment.h" +#include "Environment.h" #include "GlobalHandler.h" #include "PluginInterface.h" diff --git a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp --- a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp @@ -17,7 +17,7 @@ #include #include "Debug.h" -#include "DeviceEnvironment.h" +#include "Environment.h" #include "GlobalHandler.h" #include "PluginInterface.h" #include "omptarget.h" diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -37,7 +37,7 @@ #include "internal.h" #include "rt.h" -#include "DeviceEnvironment.h" +#include "Environment.h" #include "get_elf_mach_gfx_name.h" #include "omptargetplugin.h" #include "print_tracing.h" diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -23,7 +23,7 @@ #include #include "Debug.h" -#include "DeviceEnvironment.h" +#include "Environment.h" #include "omptarget.h" #include "omptargetplugin.h"