diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -781,7 +781,7 @@ emitGenericVarsEpilog(CGF); CGBuilderTy &Bld = CGF.Builder; - OMPBuilder.createTargetDeinit(Bld, IsSPMD); + OMPBuilder.createTargetDeinit(Bld); } void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D, @@ -819,24 +819,6 @@ IsInTTDRegion = false; } -// Create a unique global variable to indicate the execution mode of this target -// region. The execution mode is either 'generic', or 'spmd' depending on the -// target directive. This variable is picked up by the offload library to setup -// the device appropriately before kernel launch. If the execution mode is -// 'generic', the runtime reserves one warp for the master, otherwise, all -// warps participate in parallel work. -static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name, - bool Mode) { - auto *GVMode = new llvm::GlobalVariable( - CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::WeakAnyLinkage, - llvm::ConstantInt::get(CGM.Int8Ty, Mode ? OMP_TGT_EXEC_MODE_SPMD - : OMP_TGT_EXEC_MODE_GENERIC), - Twine(Name, "_exec_mode")); - GVMode->setVisibility(llvm::GlobalVariable::ProtectedVisibility); - CGM.addCompilerUsedGlobal(GVMode); -} - void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, @@ -853,8 +835,6 @@ else emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); - - setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode); } CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1468,8 +1468,7 @@ /// Create a runtime call for kmpc_target_deinit /// /// \param Loc The insert and source location description. - /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. - void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD); + void createTargetDeinit(const LocationDescription &Loc); ///} diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -96,6 +96,11 @@ Int64, Int64, Int32Arr3Ty, Int32Arr3Ty, Int32) __OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, false, Int8Ptr) __OMP_STRUCT_TYPE(DependInfo, kmp_dep_info, false, SizeTy, SizeTy, Int8) +__OMP_STRUCT_TYPE(ConfigurationEnvironment, ConfigurationEnvironmentTy, false, + Int8, Int8, Int8) +__OMP_STRUCT_TYPE(DynamicEnvironment, DynamicEnvironmentTy, false, Int16) +__OMP_STRUCT_TYPE(KernelEnvironment, KernelEnvironmentTy, false, + ConfigurationEnvironment, IdentPtr, DynamicEnvironmentPtr) #undef __OMP_STRUCT_TYPE #undef OMP_STRUCT_TYPE @@ -452,8 +457,8 @@ /* Int */ Int32, /* kmp_task_t */ VoidPtr) /// OpenMP Device runtime functions -__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1) -__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8) +__OMP_RTL(__kmpc_target_init, false, Int32, KernelEnvironmentPtr) +__OMP_RTL(__kmpc_target_deinit, false, Void,) __OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) __OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32, VoidPtr, VoidPtr, VoidPtrPtr, SizeTy) @@ -1012,9 +1017,9 @@ ReturnPtrAttrs, ParamAttrs(ReadOnlyPtrAttrs, SExt)) __OMP_RTL_ATTRS(__kmpc_target_init, AttributeSet(), SExt, - ParamAttrs(AttributeSet(), SExt, SExt)) + ParamAttrs(AttributeSet())) __OMP_RTL_ATTRS(__kmpc_target_deinit, AttributeSet(), AttributeSet(), - ParamAttrs(AttributeSet(), SExt)) + ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_parallel_51, AlwaysInlineAttrs, AttributeSet(), ParamAttrs(AttributeSet(), SExt, SExt, SExt, SExt, AttributeSet(), AttributeSet(), AttributeSet(), diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" @@ -3895,14 +3896,56 @@ ConstantInt *IsSPMDVal = ConstantInt::getSigned( IntegerType::getInt8Ty(Int8->getContext()), IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); - ConstantInt *UseGenericStateMachine = - ConstantInt::getBool(Int32->getContext(), !IsSPMD); - + ConstantInt *UseGenericStateMachineVal = ConstantInt::getSigned( + IntegerType::getInt8Ty(Int8->getContext()), !IsSPMD); + ConstantInt *MayUseNestedParallelismVal = + ConstantInt::getSigned(IntegerType::getInt8Ty(Int8->getContext()), true); + ConstantInt *DebugIndentionLevelVal = + ConstantInt::getSigned(IntegerType::getInt16Ty(Int8->getContext()), 0); + + Function *Kernel = Builder.GetInsertBlock()->getParent(); + // TODO: This is just a workaround to get the actual kernel, could be the + // caller of the anchor scope if we have a debug wrapper. We need a more + // proper method. + if (Kernel->hasLocalLinkage()) { + assert(Kernel->hasOneUse() && "Unexpected use of debug kernel wrapper."); + auto *CB = cast(Kernel->user_back()); + Kernel = CB->getCaller(); + } Function *Fn = getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_target_init); - - CallInst *ThreadKind = Builder.CreateCall( - Fn, {Ident, IsSPMDVal, UseGenericStateMachine}); + const DataLayout &DL = Fn->getParent()->getDataLayout(); + + Twine DynamicEnvironmenttName = Kernel->getName() + "_dynamic_environment"; + Constant *DynamicEnvironmentInitializer = + ConstantStruct::get(DynamicEnvironment, {DebugIndentionLevelVal}); + GlobalVariable *DynamicEnvironmentGV = new GlobalVariable( + M, DynamicEnvironment, /* IsConstant */ false, + GlobalValue::InternalLinkage, DynamicEnvironmentInitializer, + DynamicEnvironmenttName, + /* InsertBefore */ nullptr, llvm::GlobalValue::NotThreadLocal, + DL.getDefaultGlobalsAddressSpace()); + + Constant *ConfigurationEnvironmentInitializer = ConstantStruct::get( + ConfigurationEnvironment, { + UseGenericStateMachineVal, + MayUseNestedParallelismVal, + IsSPMDVal, + }); + Constant *KernelEnvironmentInitializer = ConstantStruct::get( + KernelEnvironment, { + ConfigurationEnvironmentInitializer, + Ident, + DynamicEnvironmentGV, + }); + Twine KernelEnvironmentName = Kernel->getName() + "_kernel_environment"; + GlobalVariable *KernelEnvironmentGV = new GlobalVariable( + M, KernelEnvironment, /* IsConstant */ true, GlobalValue::ExternalLinkage, + KernelEnvironmentInitializer, KernelEnvironmentName, + /* InsertBefore */ nullptr, llvm::GlobalValue::NotThreadLocal, + DL.getDefaultGlobalsAddressSpace()); + + CallInst *ThreadKind = Builder.CreateCall(Fn, {KernelEnvironmentGV}); Value *ExecUserCode = Builder.CreateICmpEQ( ThreadKind, ConstantInt::get(ThreadKind->getType(), -1), @@ -3935,22 +3978,14 @@ return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt()); } -void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc, - bool IsSPMD) { +void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc) { if (!updateToLocation(Loc)) return; - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); - ConstantInt *IsSPMDVal = ConstantInt::getSigned( - IntegerType::getInt8Ty(Int8->getContext()), - IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); - Function *Fn = getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_target_deinit); - Builder.CreateCall(Fn, {Ident, IsSPMDVal}); + Builder.CreateCall(Fn, {}); } void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes( diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" +#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/Assumptions.h" #include "llvm/IR/BasicBlock.h" @@ -44,6 +45,7 @@ #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -177,6 +179,80 @@ static constexpr auto TAG = "[" DEBUG_TYPE "]"; #endif +namespace KernelInfo { + +// struct ConfigurationEnvironmentTy { +// uint8_t UseGenericStateMachine; +// uint8_t MayUseNestedParallelism; +// llvm::omp::OMPTgtExecModeFlags ExecMode; +// }; + +// struct DynamicEnvironmentTy { +// uint16_t DebugIndentionLevel; +// }; + +// struct KernelEnvironmentTy { +// ConfigurationEnvironmentTy Configuration; +// IdentTy *Ident; +// DynamicEnvironmentTy *DynamicEnv; +// }; + +#define KERNEL_ENVIRONMENT_IDX(MEMBER, IDX) \ + constexpr const unsigned MEMBER##Idx = IDX; + +KERNEL_ENVIRONMENT_IDX(Configuration, 0) +KERNEL_ENVIRONMENT_IDX(Ident, 1) + +#undef KERNEL_ENVIRONMENT_IDX + +#define KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MEMBER, IDX) \ + constexpr const unsigned MEMBER##Idx = IDX; + +KERNEL_ENVIRONMENT_CONFIGURATION_IDX(UseGenericStateMachine, 0) +KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MayUseNestedParallelism, 1) +KERNEL_ENVIRONMENT_CONFIGURATION_IDX(ExecMode, 2) + +#undef KERNEL_ENVIRONMENT_CONFIGURATION_IDX + +#define KERNEL_ENVIRONMENT_GETTER(MEMBER, RETURNTYPE) \ + RETURNTYPE *get##MEMBER##FromKernelEnvironment(ConstantStruct *KernelEnvC) { \ + return cast(KernelEnvC->getAggregateElement(MEMBER##Idx)); \ + } + +KERNEL_ENVIRONMENT_GETTER(Ident, Constant) +KERNEL_ENVIRONMENT_GETTER(Configuration, ConstantStruct) + +#undef KERNEL_ENVIRONMENT_GETTER + +#define KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MEMBER) \ + ConstantInt *get##MEMBER##FromKernelEnvironment( \ + ConstantStruct *KernelEnvC) { \ + ConstantStruct *ConfigC = \ + getConfigurationFromKernelEnvironment(KernelEnvC); \ + return dyn_cast(ConfigC->getAggregateElement(MEMBER##Idx)); \ + } + +KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(UseGenericStateMachine) +KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MayUseNestedParallelism) +KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(ExecMode) + +#undef KERNEL_ENVIRONMENT_CONFIGURATION_GETTER + +GlobalVariable * +getKernelEnvironementGVFromKernelInitCB(CallBase *KernelInitCB) { + constexpr const int InitKernelEnvironmentArgNo = 0; + return cast( + KernelInitCB->getArgOperand(InitKernelEnvironmentArgNo) + ->stripPointerCasts()); +} + +ConstantStruct *getKernelEnvironementFromKernelInitCB(CallBase *KernelInitCB) { + GlobalVariable *KernelEnvGV = + getKernelEnvironementGVFromKernelInitCB(KernelInitCB); + return cast(KernelEnvGV->getInitializer()); +} +} // namespace KernelInfo + namespace { struct AAHeapToShared; @@ -609,6 +685,10 @@ /// one we abort as the kernel is malformed. CallBase *KernelInitCB = nullptr; + /// The constant kernel environement as taken from and passed to + /// __kmpc_target_init. + ConstantStruct *KernelEnvC = nullptr; + /// The __kmpc_target_deinit call in this kernel, if any. If we find more than /// one we abort as the kernel is malformed. CallBase *KernelDeinitCB = nullptr; @@ -713,6 +793,12 @@ "assumptions."); KernelDeinitCB = KIS.KernelDeinitCB; } + if (KIS.KernelEnvC) { + if (KernelEnvC && KernelEnvC != KIS.KernelEnvC) + llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt " + "assumptions."); + KernelEnvC = KIS.KernelEnvC; + } SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker; ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions; ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions; @@ -2790,9 +2876,11 @@ CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr; if (!CB) return false; - const int InitModeArgNo = 1; - auto *ModeCI = dyn_cast(CB->getOperand(InitModeArgNo)); - return ModeCI && (ModeCI->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC); + ConstantStruct *KernelEnvC = + KernelInfo::getKernelEnvironementFromKernelInitCB(CB); + ConstantInt *ExecModeC = + KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC); + return ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC; } if (C->isZero()) { @@ -3434,6 +3522,29 @@ return GuardedInstructions; } + void setConfigurationOfKernelEnvironment(ConstantStruct *ConfigC) { + Constant *NewKernelEnvC = ConstantFoldInsertValueInstruction( + KernelEnvC, ConfigC, {KernelInfo::ConfigurationIdx}); + assert(NewKernelEnvC && "Failed to create new kernel environment"); + KernelEnvC = cast(NewKernelEnvC); + } + +#define KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MEMBER) \ + void set##MEMBER##OfKernelEnvironment(ConstantInt *NewVal) { \ + ConstantStruct *ConfigC = \ + KernelInfo::getConfigurationFromKernelEnvironment(KernelEnvC); \ + Constant *NewConfigC = ConstantFoldInsertValueInstruction( \ + ConfigC, NewVal, {KernelInfo::MEMBER##Idx}); \ + assert(NewConfigC && "Failed to create new configuration environment"); \ + setConfigurationOfKernelEnvironment(cast(NewConfigC)); \ + } + + KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(UseGenericStateMachine) + KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MayUseNestedParallelism) + KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(ExecMode) + +#undef KERNEL_ENVIRONMENT_CONFIGURATION_SETTER + /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { // This is a high-level transform that might change the constant arguments @@ -3482,76 +3593,39 @@ ReachingKernelEntries.insert(Fn); IsKernelEntry = true; - // For kernels we might need to initialize/finalize the IsSPMD state and - // we need to register a simplification callback so that the Attributor - // knows the constant arguments to __kmpc_target_init and - // __kmpc_target_deinit might actually change. + KernelEnvC = + KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB); - Attributor::SimplifictionCallbackTy StateMachineSimplifyCB = + Attributor::SimplifictionCallbackTy KernelConfigurationSimplifyCB = [&](const IRPosition &IRP, const AbstractAttribute *AA, bool &UsedAssumedInformation) -> std::optional { - // IRP represents the "use generic state machine" argument of an - // __kmpc_target_init call. We will answer this one with the internal - // state. As long as we are not in an invalid state, we will create a - // custom state machine so the value should be a `i1 false`. If we are - // in an invalid state, we won't change the value that is in the IR. - if (!ReachedKnownParallelRegions.isValidState()) - return nullptr; - // If we have disabled state machine rewrites, don't make a custom one. - if (DisableOpenMPOptStateMachineRewrite) - return nullptr; - if (AA) - A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); - UsedAssumedInformation = !isAtFixpoint(); - auto *FalseVal = - ConstantInt::getBool(IRP.getAnchorValue().getContext(), false); - return FalseVal; + return KernelEnvC; }; - Attributor::SimplifictionCallbackTy ModeSimplifyCB = - [&](const IRPosition &IRP, const AbstractAttribute *AA, - bool &UsedAssumedInformation) -> std::optional { - // IRP represents the "SPMDCompatibilityTracker" argument of an - // __kmpc_target_init or - // __kmpc_target_deinit call. We will answer this one with the internal - // state. - if (!SPMDCompatibilityTracker.isValidState()) - return nullptr; - if (!SPMDCompatibilityTracker.isAtFixpoint()) { - if (AA) - A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); - UsedAssumedInformation = true; - } else { - UsedAssumedInformation = false; - } - auto *Val = ConstantInt::getSigned( - IntegerType::getInt8Ty(IRP.getAnchorValue().getContext()), - SPMDCompatibilityTracker.isAssumed() ? OMP_TGT_EXEC_MODE_SPMD - : OMP_TGT_EXEC_MODE_GENERIC); - return Val; - }; - - constexpr const int InitModeArgNo = 1; - constexpr const int DeinitModeArgNo = 1; - constexpr const int InitUseStateMachineArgNo = 2; - A.registerSimplificationCallback( - IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo), - StateMachineSimplifyCB); - A.registerSimplificationCallback( - IRPosition::callsite_argument(*KernelInitCB, InitModeArgNo), - ModeSimplifyCB); - A.registerSimplificationCallback( - IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo), - ModeSimplifyCB); + A.registerSimplificationCallback(IRPosition::value(*KernelEnvC), + KernelConfigurationSimplifyCB); // Check if we know we are in SPMD-mode already. - ConstantInt *ModeArg = - dyn_cast(KernelInitCB->getArgOperand(InitModeArgNo)); - if (ModeArg && (ModeArg->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) + ConstantInt *ExecModeC = + KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC); + ConstantInt *AssumedExecModeC = ConstantInt::get( + ExecModeC->getType(), + ExecModeC->getSExtValue() | OMP_TGT_EXEC_MODE_GENERIC_SPMD); + if (ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD) SPMDCompatibilityTracker.indicateOptimisticFixpoint(); - // This is a generic region but SPMDization is disabled so stop tracking. else if (DisableOpenMPOptSPMDization) + // This is a generic region but SPMDization is disabled so stop + // tracking. SPMDCompatibilityTracker.indicatePessimisticFixpoint(); + else + setExecModeOfKernelEnvironment(AssumedExecModeC); + + ConstantInt *MayUseNestedParallelismC = + KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(KernelEnvC); + ConstantInt *AssumedMayUseNestedParallelismC = ConstantInt::get( + MayUseNestedParallelismC->getType(), NestedParallelism); + setMayUseNestedParallelismOfKernelEnvironment( + AssumedMayUseNestedParallelismC); // Register virtual uses of functions we might need to preserve. auto RegisterVirtualUse = [&](RuntimeFunction RFKind, @@ -3652,21 +3726,21 @@ if (!KernelInitCB || !KernelDeinitCB) return ChangeStatus::UNCHANGED; - /// Insert nested Parallelism global variable - Function *Kernel = getAnchorScope(); - Module &M = *Kernel->getParent(); - Type *Int8Ty = Type::getInt8Ty(M.getContext()); - new GlobalVariable(M, Int8Ty, /* isConstant */ true, - GlobalValue::WeakAnyLinkage, - ConstantInt::get(Int8Ty, NestedParallelism ? 1 : 0), - Kernel->getName() + "_nested_parallelism"); + ChangeStatus Changed = ChangeStatus::UNCHANGED; // If we can we change the execution mode to SPMD-mode otherwise we build a // custom state machine. - ChangeStatus Changed = ChangeStatus::UNCHANGED; if (!changeToSPMDMode(A, Changed)) { if (!KernelInitCB->getCalledFunction()->isDeclaration()) - return buildCustomStateMachine(A); + Changed |= buildCustomStateMachine(A); + } + + // At last, update the KernelEnvc + GlobalVariable *KernelEnvGV = + KernelInfo::getKernelEnvironementGVFromKernelInitCB(KernelInitCB); + if (KernelEnvGV->getInitializer() != KernelEnvC) { + KernelEnvGV->setInitializer(KernelEnvC); + Changed = ChangeStatus::CHANGED; } return Changed; @@ -3736,14 +3810,14 @@ // Find escaping outputs from the guarded region to outside users and // broadcast their values to them. for (Instruction &I : *RegionStartBB) { - SmallPtrSet OutsideUsers; - for (User *Usr : I.users()) { - Instruction &UsrI = *cast(Usr); + SmallVector OutsideUses; + for (Use &U : I.uses()) { + Instruction &UsrI = *cast(U.getUser()); if (UsrI.getParent() != RegionStartBB) - OutsideUsers.insert(&UsrI); + OutsideUses.push_back(&U); } - if (OutsideUsers.empty()) + if (OutsideUses.empty()) continue; HasBroadcastValues = true; @@ -3766,8 +3840,8 @@ RegionBarrierBB->getTerminator()); // Emit a load instruction and replace uses of the output value. - for (Instruction *UsrI : OutsideUsers) - UsrI->replaceUsesOfWith(&I, LoadI); + for (Use *U : OutsideUses) + A.changeUseAfterManifest(*U, *LoadI); } auto &OMPInfoCache = static_cast(A.getInfoCache()); @@ -3994,16 +4068,11 @@ assert(OMPInfoCache.Kernels.count(Kernel) && "Expected kernel function!"); // Check if the kernel is already in SPMD mode, if so, return success. - GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable( - (Kernel->getName() + "_exec_mode").str()); - assert(ExecMode && "Kernel without exec mode?"); - assert(ExecMode->getInitializer() && "ExecMode doesn't have initializer!"); - - // Set the global exec mode flag to indicate SPMD-Generic mode. - assert(isa(ExecMode->getInitializer()) && - "ExecMode is not an integer!"); - const int8_t ExecModeVal = - cast(ExecMode->getInitializer())->getSExtValue(); + ConstantStruct *ExistingKernelEnvC = + KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB); + auto *ExecModeC = + KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC); + const int8_t ExecModeVal = ExecModeC->getSExtValue(); if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC) return true; @@ -4021,27 +4090,8 @@ // kernel is executed in. assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC && "Initially non-SPMD kernel has SPMD exec mode!"); - ExecMode->setInitializer( - ConstantInt::get(ExecMode->getInitializer()->getType(), - ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD)); - - // Next rewrite the init and deinit calls to indicate we use SPMD-mode now. - const int InitModeArgNo = 1; - const int DeinitModeArgNo = 1; - const int InitUseStateMachineArgNo = 2; - - auto &Ctx = getAnchorValue().getContext(); - A.changeUseAfterManifest( - KernelInitCB->getArgOperandUse(InitModeArgNo), - *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx), - OMP_TGT_EXEC_MODE_SPMD)); - A.changeUseAfterManifest( - KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), - *ConstantInt::getBool(Ctx, false)); - A.changeUseAfterManifest( - KernelDeinitCB->getArgOperandUse(DeinitModeArgNo), - *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx), - OMP_TGT_EXEC_MODE_SPMD)); + setExecModeOfKernelEnvironment(ConstantInt::get( + ExecModeC->getType(), ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD)); ++NumOpenMPTargetRegionKernelsSPMD; @@ -4068,30 +4118,29 @@ OMPRTL___kmpc_kernel_parallel, OMPRTL___kmpc_kernel_end_parallel})) return ChangeStatus::UNCHANGED; - const int InitModeArgNo = 1; - const int InitUseStateMachineArgNo = 2; + ConstantStruct *ExistingKernelEnvC = + KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB); // Check if the current configuration is non-SPMD and generic state machine. // If we already have SPMD mode or a custom state machine we do not need to // go any further. If it is anything but a constant something is weird and // we give up. - ConstantInt *UseStateMachine = dyn_cast( - KernelInitCB->getArgOperand(InitUseStateMachineArgNo)); - ConstantInt *Mode = - dyn_cast(KernelInitCB->getArgOperand(InitModeArgNo)); + ConstantInt *UseStateMachineC = + KernelInfo::getUseGenericStateMachineFromKernelEnvironment( + ExistingKernelEnvC); + ConstantInt *ModeC = + KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC); // If we are stuck with generic mode, try to create a custom device (=GPU) // state machine which is specialized for the parallel regions that are // reachable by the kernel. - if (!UseStateMachine || UseStateMachine->isZero() || !Mode || - (Mode->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) + if (UseStateMachineC->isZero() || + (ModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) return ChangeStatus::UNCHANGED; // If not SPMD mode, indicate we use a custom state machine now. - auto &Ctx = getAnchorValue().getContext(); - auto *FalseVal = ConstantInt::getBool(Ctx, false); - A.changeUseAfterManifest( - KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), *FalseVal); + setUseGenericStateMachineOfKernelEnvironment( + ConstantInt::get(UseStateMachineC->getType(), false)); // If we don't actually need a state machine we are done here. This can // happen if there simply are no parallel regions. In the resulting kernel @@ -4170,6 +4219,7 @@ // UserCodeEntryBB: // user code // __kmpc_target_deinit(...) // + auto &Ctx = getAnchorValue().getContext(); Function *Kernel = getAssociatedFunction(); assert(Kernel && "Expected an associated function!"); @@ -4252,7 +4302,7 @@ StateMachineBeginBB->end()), DLoc)); - Value *Ident = KernelInitCB->getArgOperand(0); + Value *Ident = KernelInfo::getIdentFromKernelEnvironment(KernelEnvC); Value *GTid = KernelInitCB; FunctionCallee BarrierFn = @@ -4382,6 +4432,46 @@ ChangeStatus updateImpl(Attributor &A) override { KernelInfoState StateBefore = getState(); + // When we leave this function this RAII will make sure the member + // KernelEnvC is updated properly depending on the state. That member is + // used for simplification of values and needs to be up to date at all + // times. + struct UpdateKernelEnvCRAII { + AAKernelInfoFunction &AA; + + UpdateKernelEnvCRAII(AAKernelInfoFunction &AA) : AA(AA) {} + + ~UpdateKernelEnvCRAII() { + if (!AA.KernelEnvC) + return; + + ConstantStruct *ExistingKernelEnvC = + KernelInfo::getKernelEnvironementFromKernelInitCB(AA.KernelInitCB); + + if (!AA.isValidState()) { + AA.KernelEnvC = ExistingKernelEnvC; + return; + } + + if (!AA.ReachedKnownParallelRegions.isValidState()) + AA.setUseGenericStateMachineOfKernelEnvironment( + KernelInfo::getUseGenericStateMachineFromKernelEnvironment( + ExistingKernelEnvC)); + + if (!AA.SPMDCompatibilityTracker.isValidState()) + AA.setExecModeOfKernelEnvironment( + KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC)); + + ConstantInt *MayUseNestedParallelismC = + KernelInfo::getMayUseNestedParallelismFromKernelEnvironment( + AA.KernelEnvC); + ConstantInt *NewMayUseNestedParallelismC = ConstantInt::get( + MayUseNestedParallelismC->getType(), AA.NestedParallelism); + AA.setMayUseNestedParallelismOfKernelEnvironment( + NewMayUseNestedParallelismC); + } + } RAII(*this); + // Callback to check a read/write instruction. auto CheckRWInst = [&](Instruction &I) { // We handle calls later. diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -128,6 +128,7 @@ -nocudalib -nogpulib -nostdinc -fopenmp -fopenmp-cuda-mode -Wno-unknown-cuda-version + -DOMPTARGET_DEVICE_RUNTIME -I${include_directory} -I${devicertl_base_directory}/../include ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL} diff --git a/openmp/libomptarget/DeviceRTL/include/Debug.h b/openmp/libomptarget/DeviceRTL/include/Debug.h --- a/openmp/libomptarget/DeviceRTL/include/Debug.h +++ b/openmp/libomptarget/DeviceRTL/include/Debug.h @@ -50,8 +50,6 @@ struct DebugEntryRAII { DebugEntryRAII(const char *File, const unsigned Line, const char *Function); ~DebugEntryRAII(); - - static void init(); }; #endif diff --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h --- a/openmp/libomptarget/DeviceRTL/include/Interface.h +++ b/openmp/libomptarget/DeviceRTL/include/Interface.h @@ -214,12 +214,14 @@ /// Kernel /// ///{ +// Forward declaration +struct KernelEnvironmentTy; + int8_t __kmpc_is_spmd_exec_mode(); -int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode, - bool UseGenericStateMachine); +int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment); -void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode); +void __kmpc_target_deinit(); ///} diff --git a/openmp/libomptarget/DeviceRTL/include/State.h b/openmp/libomptarget/DeviceRTL/include/State.h --- a/openmp/libomptarget/DeviceRTL/include/State.h +++ b/openmp/libomptarget/DeviceRTL/include/State.h @@ -17,6 +17,9 @@ #include "Types.h" #include "Utils.h" +// Forward declaration. +struct KernelEnvironmentTy; + #pragma omp begin declare target device_type(nohost) namespace ompx { @@ -113,7 +116,10 @@ #pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc) /// Initialize the state machinery. Must be called by all threads. -void init(bool IsSPMD); +void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment); + +/// Return the kernel environment associated with the current kernel. +KernelEnvironmentTy &getKernelEnvironment(); /// TODO enum ValueKind { diff --git a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp --- a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "Configuration.h" -#include "DeviceEnvironment.h" +#include "Environment.h" #include "State.h" #include "Types.h" @@ -53,7 +53,9 @@ bool config::mayUseThreadStates() { return !__omp_rtl_assume_no_thread_state; } bool config::mayUseNestedParallelism() { - return !__omp_rtl_assume_no_nested_parallelism; + if (__omp_rtl_assume_no_nested_parallelism) + return false; + return state::getKernelEnvironment().Configuration.MayUseNestedParallelism; } #pragma omp end declare target diff --git a/openmp/libomptarget/DeviceRTL/src/Debug.cpp b/openmp/libomptarget/DeviceRTL/src/Debug.cpp --- a/openmp/libomptarget/DeviceRTL/src/Debug.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Debug.cpp @@ -12,8 +12,10 @@ #include "Debug.h" #include "Configuration.h" +#include "Environment.h" #include "Interface.h" #include "Mapping.h" +#include "State.h" #include "Types.h" using namespace ompx; @@ -31,15 +33,14 @@ } } -/// Current indentation level for the function trace. Only accessed by thread 0. -__attribute__((loader_uninitialized)) static uint32_t Level; -#pragma omp allocate(Level) allocator(omp_pteam_mem_alloc) - DebugEntryRAII::DebugEntryRAII(const char *File, const unsigned Line, const char *Function) { if (config::isDebugMode(config::DebugKind::FunctionTracing) && mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0) { + uint16_t &Level = + state::getKernelEnvironment().DynamicEnv->DebugIndentionLevel; + for (int I = 0; I < Level; ++I) PRINTF("%s", " "); @@ -51,10 +52,11 @@ DebugEntryRAII::~DebugEntryRAII() { if (config::isDebugMode(config::DebugKind::FunctionTracing) && - mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0) + mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0) { + uint16_t &Level = + state::getKernelEnvironment().DynamicEnv->DebugIndentionLevel; Level--; + } } -void DebugEntryRAII::init() { Level = 0; } - #pragma omp end declare target diff --git a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp --- a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "Debug.h" +#include "Environment.h" #include "Interface.h" #include "Mapping.h" #include "State.h" @@ -23,11 +24,12 @@ #pragma omp begin declare target device_type(nohost) -static void inititializeRuntime(bool IsSPMD) { +static void inititializeRuntime(bool IsSPMD, + KernelEnvironmentTy &KernelEnvironment) { // Order is important here. synchronize::init(IsSPMD); mapping::init(IsSPMD); - state::init(IsSPMD); + state::init(IsSPMD, KernelEnvironment); } /// Simple generic state machine for worker threads. @@ -67,16 +69,17 @@ /// /// \param Ident Source location identification, can be NULL. /// -int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode, - bool UseGenericStateMachine) { +int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment) { FunctionTracingRAII(); - const bool IsSPMD = - Mode & llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD; + ConfigurationEnvironmentTy &Configuration = KernelEnvironment.Configuration; + bool IsSPMD = Configuration.ExecMode & + llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD; + bool UseGenericStateMachine = Configuration.UseGenericStateMachine; if (IsSPMD) { - inititializeRuntime(/* IsSPMD */ true); + inititializeRuntime(/* IsSPMD */ true, KernelEnvironment); synchronize::threadsAligned(); } else { - inititializeRuntime(/* IsSPMD */ false); + inititializeRuntime(/* IsSPMD */ false, KernelEnvironment); // No need to wait since only the main threads will execute user // code and workers will run into a barrier right away. } @@ -104,7 +107,7 @@ // thread's warp, so none of its threads can ever be active worker threads. if (UseGenericStateMachine && mapping::getThreadIdInBlock() < mapping::getBlockSize(IsSPMD)) { - genericStateMachine(Ident); + genericStateMachine(KernelEnvironment.Ident); } else { // Retrieve the work function just to ensure we always call // __kmpc_kernel_parallel even if a custom state machine is used. @@ -128,10 +131,9 @@ /// /// \param Ident Source location identification, can be NULL. /// -void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode) { +void __kmpc_target_deinit() { FunctionTracingRAII(); - const bool IsSPMD = - Mode & llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD; + bool IsSPMD = mapping::isSPMDMode(); state::assumeInitialState(IsSPMD); if (IsSPMD) return; diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -9,8 +9,8 @@ //===----------------------------------------------------------------------===// #include "State.h" -#include "Configuration.h" #include "Debug.h" +#include "Environment.h" #include "Interface.h" #include "Mapping.h" #include "Synchronization.h" @@ -34,6 +34,9 @@ extern unsigned char DynamicSharedBuffer[] __attribute__((aligned(Alignment))); #pragma omp allocate(DynamicSharedBuffer) allocator(omp_pteam_mem_alloc) +/// The kernel environment passed to the init method by the compiler. +static KernelEnvironmentTy *SHARED(KernelEnvironmentPtr); + namespace { /// Fallback implementations are missing to trigger a link time error. @@ -241,15 +244,19 @@ } // namespace -void state::init(bool IsSPMD) { +void state::init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment) { SharedMemorySmartStack.init(IsSPMD); if (mapping::isInitialThreadInLevel0(IsSPMD)) { TeamState.init(IsSPMD); - DebugEntryRAII::init(); ThreadStates = nullptr; + KernelEnvironmentPtr = &KernelEnvironment; } } +KernelEnvironmentTy &state::getKernelEnvironment() { + return *KernelEnvironmentPtr; +} + void state::enterDataEnvironment(IdentTy *Ident) { ASSERT(config::mayUseThreadStates() && "Thread state modified while explicitly disabled!"); diff --git a/openmp/libomptarget/include/DeviceEnvironment.h b/openmp/libomptarget/include/DeviceEnvironment.h deleted file mode 100644 --- a/openmp/libomptarget/include/DeviceEnvironment.h +++ /dev/null @@ -1,25 +0,0 @@ -//===---- device_environment.h - OpenMP GPU device environment ---- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Global device environment -// -//===----------------------------------------------------------------------===// - -#ifndef _OMPTARGET_DEVICE_ENVIRONMENT_H_ -#define _OMPTARGET_DEVICE_ENVIRONMENT_H_ - -// deviceRTL uses and DeviceRTL uses explicit definitions - -struct DeviceEnvironmentTy { - uint32_t DebugKind; - uint32_t NumDevices; - uint32_t DeviceNum; - uint32_t DynamicMemSize; -}; - -#endif diff --git a/openmp/libomptarget/include/Environment.h b/openmp/libomptarget/include/Environment.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/include/Environment.h @@ -0,0 +1,61 @@ +//===------------ Environment.h - OpenMP GPU environments --------- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Environments shared between host and device. +// +//===----------------------------------------------------------------------===// + +#ifndef _OMPTARGET_ENVIRONMENT_H_ +#define _OMPTARGET_ENVIRONMENT_H_ + +#ifdef OMPTARGET_DEVICE_RUNTIME +#include "Types.h" +#else +#include "SourceInfo.h" + +#include + +using IdentTy = ident_t; +#endif + +#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h" + +struct DeviceEnvironmentTy { + uint32_t DebugKind; + uint32_t NumDevices; + uint32_t DeviceNum; + uint32_t DynamicMemSize; +}; + +// NOTE: Please don't change the order of those members as their indices are +// used in the middle end. Always add the new data member at the end. +// Different from KernelEnvironmentTy below, this structure contains members +// that might be modified at runtime. +struct DynamicEnvironmentTy { + /// Current indentation level for the function trace. Only accessed by thread + /// 0. + uint16_t DebugIndentionLevel; +}; + +// NOTE: Please don't change the order of those members as their indices are +// used in the middle end. Always add the new data member at the end. +struct ConfigurationEnvironmentTy { + uint8_t UseGenericStateMachine; + uint8_t MayUseNestedParallelism; + llvm::omp::OMPTgtExecModeFlags ExecMode; +}; + +// NOTE: Please don't change the order of those members as their indices are +// used in the middle end. Always add the new data member at the end. +struct KernelEnvironmentTy { + ConfigurationEnvironmentTy Configuration; + IdentTy *Ident; + DynamicEnvironmentTy *DynamicEnv; +}; + +#endif // _OMPTARGET_ENVIRONMENT_H_ diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -21,7 +21,7 @@ #include #include "Debug.h" -#include "DeviceEnvironment.h" +#include "Environment.h" #include "GlobalHandler.h" #include "PluginInterface.h" #include "Utilities.h" diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h @@ -19,7 +19,7 @@ #include #include "Debug.h" -#include "DeviceEnvironment.h" +#include "Environment.h" #include "GlobalHandler.h" #include "JIT.h" #include "MemoryManager.h" @@ -732,6 +732,11 @@ /// Map of host pinned allocations used for optimize device transfers. PinnedAllocationMapTy PinnedAllocs; + +private: + /// Return the kernel environment object for kernel \p Name. + Expected + getKernelEnvironmentForKernel(StringRef Name, DeviceImageTy &Image); }; /// Class implementing common functionalities of offload plugins. Each plugin diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp @@ -554,32 +554,43 @@ return Plugin::success(); } -Expected -GenericDeviceTy::getExecutionModeForKernel(StringRef Name, - DeviceImageTy &Image) { - // Create a metadata object for the exec mode global (auto-generated). - StaticGlobalTy ExecModeGlobal(Name.data(), - "_exec_mode"); +Expected +GenericDeviceTy::getKernelEnvironmentForKernel(StringRef Name, + DeviceImageTy &Image) { + // Create a metadata object for the kernel environment object. + StaticGlobalTy KernelEnv(Name.data(), "_kernel_info"); - // Retrieve execution mode for the kernel. This may fail since some kernels - // may not have an execution mode. + // Retrieve kernel environment object for the kernel. GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler(); - if (auto Err = GHandler.readGlobalFromImage(*this, Image, ExecModeGlobal)) { + if (auto Err = GHandler.readGlobalFromImage(*this, Image, KernelEnv)) { // Consume the error since it is acceptable to fail. [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); - DP("Failed to read execution mode for '%s': %s\n" - "Using default SPMD (2) execution mode\n", - Name.data(), ErrStr.data()); + DP("Failed to read kernel environment object for '%s': %s\n", Name.data(), + ErrStr.data()); - return OMP_TGT_EXEC_MODE_SPMD; + return createStringError(inconvertibleErrorCode(), ErrStr); } + return KernelEnv.getValue(); +} + +Expected +GenericDeviceTy::getExecutionModeForKernel(StringRef Name, + DeviceImageTy &Image) { + auto KernelEnvOrError = getKernelEnvironmentForKernel(Name, Image); + // We error out directly if we can't read the kernel environment object. + if (!KernelEnvOrError) + return KernelEnvOrError.takeError(); + + auto &KernelEnv = *KernelEnvOrError; + auto ExecMode = KernelEnv.Configuration.ExecMode; + // Check that the retrieved execution mode is valid. - if (!GenericKernelTy::isValidExecutionMode(ExecModeGlobal.getValue())) - return Plugin::error("Invalid execution mode %d for '%s'", - ExecModeGlobal.getValue(), Name.data()); + if (!GenericKernelTy::isValidExecutionMode(ExecMode)) + return Plugin::error("Invalid execution mode %d for '%s'", ExecMode, + Name.data()); - return ExecModeGlobal.getValue(); + return ExecMode; } Error PinnedAllocationMapTy::insertEntry(void *HstPtr, void *DevAccessiblePtr, diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp @@ -17,7 +17,7 @@ #include #include "Debug.h" -#include "DeviceEnvironment.h" +#include "Environment.h" #include "GlobalHandler.h" #include "PluginInterface.h" diff --git a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp --- a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp @@ -17,7 +17,7 @@ #include #include "Debug.h" -#include "DeviceEnvironment.h" +#include "Environment.h" #include "GlobalHandler.h" #include "PluginInterface.h" #include "omptarget.h" diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -37,7 +37,7 @@ #include "internal.h" #include "rt.h" -#include "DeviceEnvironment.h" +#include "Environment.h" #include "get_elf_mach_gfx_name.h" #include "omptargetplugin.h" #include "print_tracing.h" diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -23,7 +23,7 @@ #include #include "Debug.h" -#include "DeviceEnvironment.h" +#include "Environment.h" #include "omptarget.h" #include "omptargetplugin.h"