diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -480,27 +480,6 @@ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) }; -namespace { -LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); -/// Values for bit flags for marking which requires clauses have been used. -enum OpenMPOffloadingRequiresDirFlags : int64_t { - /// flag undefined. - OMP_REQ_UNDEFINED = 0x000, - /// no requires clause present. - OMP_REQ_NONE = 0x001, - /// reverse_offload clause. - OMP_REQ_REVERSE_OFFLOAD = 0x002, - /// unified_address clause. - OMP_REQ_UNIFIED_ADDRESS = 0x004, - /// unified_shared_memory clause. - OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, - /// dynamic_allocators clause. - OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) -}; - -} // anonymous namespace - /// Describes ident structure that describes a source location. /// All descriptions are taken from /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h @@ -1055,9 +1034,11 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) : CGM(CGM), OMPBuilder(CGM.getModule()) { KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); - llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsTargetDevice, - isGPU(), hasRequiresUnifiedSharedMemory(), - CGM.getLangOpts().OpenMPOffloadMandatory); + llvm::OpenMPIRBuilderConfig Config( + CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(), + CGM.getLangOpts().OpenMPOffloadMandatory, + /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false, + hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false); OMPBuilder.initialize(CGM.getLangOpts().OpenMPIsTargetDevice ? CGM.getLangOpts().OMPHostIRFile : StringRef{}); @@ -10162,7 +10143,6 @@ std::string ReqName = getName({"omp_offloading", "requires_reg"}); RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); - OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; // TODO: check for other requires clauses. // The requires directive takes effect only when a target region is // present in the compilation unit. Otherwise it is ignored and not @@ -10172,11 +10152,10 @@ assert((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || !OMPBuilder.OffloadInfoManager.empty()) && "Target or declare target region expected."); - if (HasRequiresUnifiedSharedMemory) - Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___tgt_register_requires), - llvm::ConstantInt::get(CGM.Int64Ty, Flags)); + llvm::ConstantInt::get( + CGM.Int64Ty, OMPBuilder.Config.getRequiresFlags())); CGF.FinishFunction(); } return RequiresRegFn; diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -853,9 +853,11 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) : CGOpenMPRuntime(CGM) { - llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsTargetDevice, - isGPU(), hasRequiresUnifiedSharedMemory(), - CGM.getLangOpts().OpenMPOffloadMandatory); + llvm::OpenMPIRBuilderConfig Config( + CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(), + CGM.getLangOpts().OpenMPOffloadMandatory, + /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false, + hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false); OMPBuilder.setConfig(Config); if (!CGM.getLangOpts().OpenMPIsTargetDevice) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -89,10 +89,6 @@ /// Flag for specifying if the compilation is done for an accelerator. std::optional IsGPU; - /// Flag for specifying weather a requires unified_shared_memory - /// directive is present or not. - std::optional HasRequiresUnifiedSharedMemory; - // Flag for specifying if offloading is mandatory. std::optional OpenMPOffloadMandatory; @@ -101,13 +97,13 @@ /// Separator used between all of the rest consecutive parts of s name std::optional Separator; - OpenMPIRBuilderConfig() {} + OpenMPIRBuilderConfig(); OpenMPIRBuilderConfig(bool IsTargetDevice, bool IsGPU, + bool OpenMPOffloadMandatory, + bool HasRequiresReverseOffload, + bool HasRequiresUnifiedAddress, bool HasRequiresUnifiedSharedMemory, - bool OpenMPOffloadMandatory) - : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU), - HasRequiresUnifiedSharedMemory(HasRequiresUnifiedSharedMemory), - OpenMPOffloadMandatory(OpenMPOffloadMandatory) {} + bool HasRequiresDynamicAllocators); // Getters functions that assert if the required values are not present. bool isTargetDevice() const { @@ -120,17 +116,22 @@ return *IsGPU; } - bool hasRequiresUnifiedSharedMemory() const { - assert(HasRequiresUnifiedSharedMemory.has_value() && - "HasUnifiedSharedMemory is not set"); - return *HasRequiresUnifiedSharedMemory; - } - bool openMPOffloadMandatory() const { assert(OpenMPOffloadMandatory.has_value() && "OpenMPOffloadMandatory is not set"); return *OpenMPOffloadMandatory; } + + bool hasRequiresFlags() const { return RequiresFlags; } + bool hasRequiresReverseOffload() const; + bool hasRequiresUnifiedAddress() const; + bool hasRequiresUnifiedSharedMemory() const; + bool hasRequiresDynamicAllocators() const; + + /// Returns requires directive clauses as flags compatible with those expected + /// by libomptarget. + int64_t getRequiresFlags() const; + // Returns the FirstSeparator if set, otherwise use the default separator // depending on isGPU StringRef firstSeparator() const { @@ -153,11 +154,17 @@ void setIsTargetDevice(bool Value) { IsTargetDevice = Value; } void setIsGPU(bool Value) { IsGPU = Value; } - void setHasRequiresUnifiedSharedMemory(bool Value) { - HasRequiresUnifiedSharedMemory = Value; - } void setFirstSeparator(StringRef FS) { FirstSeparator = FS; } void setSeparator(StringRef S) { Separator = S; } + + void setHasRequiresReverseOffload(bool Value); + void setHasRequiresUnifiedAddress(bool Value); + void setHasRequiresUnifiedSharedMemory(bool Value); + void setHasRequiresDynamicAllocators(bool Value); + +private: + /// Flags for specifying which requires directive clauses are present. + int64_t RequiresFlags; }; /// Data structure to contain the information needed to uniquely identify @@ -2520,6 +2527,16 @@ /// \param Name Name of the variable. GlobalVariable *getOrCreateInternalVariable(Type *Ty, const StringRef &Name, unsigned AddressSpace = 0); + + /// Create a global function to register OpenMP requires flags into the + /// runtime, according to the `Config`. + /// + /// This function should be added to the list of constructors of the + /// compilation unit in order to be called before other OpenMP runtime + /// functions. + /// + /// \param Name Name of the created function. + Function *createRegisterRequires(StringRef Name); }; /// Class to represented the control flow structure of an OpenMP canonical loop. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -25,12 +25,14 @@ #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Frontend/OpenMP/OMPGridValues.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" @@ -338,6 +340,104 @@ return splitBB(Builder, CreateBranch, Old->getName() + Suffix); } +//===----------------------------------------------------------------------===// +// OpenMPIRBuilderConfig +//===----------------------------------------------------------------------===// + +namespace { +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); +/// Values for bit flags for marking which requires clauses have been used. +enum OpenMPOffloadingRequiresDirFlags { + /// flag undefined. + OMP_REQ_UNDEFINED = 0x000, + /// no requires directive present. + OMP_REQ_NONE = 0x001, + /// reverse_offload clause. + OMP_REQ_REVERSE_OFFLOAD = 0x002, + /// unified_address clause. + OMP_REQ_UNIFIED_ADDRESS = 0x004, + /// unified_shared_memory clause. + OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, + /// dynamic_allocators clause. + OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) +}; + +} // anonymous namespace + +OpenMPIRBuilderConfig::OpenMPIRBuilderConfig() + : RequiresFlags(OMP_REQ_UNDEFINED) {} + +OpenMPIRBuilderConfig::OpenMPIRBuilderConfig( + bool IsTargetDevice, bool IsGPU, bool OpenMPOffloadMandatory, + bool HasRequiresReverseOffload, bool HasRequiresUnifiedAddress, + bool HasRequiresUnifiedSharedMemory, bool HasRequiresDynamicAllocators) + : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU), + OpenMPOffloadMandatory(OpenMPOffloadMandatory), + RequiresFlags(OMP_REQ_UNDEFINED) { + if (HasRequiresReverseOffload) + RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD; + if (HasRequiresUnifiedAddress) + RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS; + if (HasRequiresUnifiedSharedMemory) + RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY; + if (HasRequiresDynamicAllocators) + RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS; +} + +bool OpenMPIRBuilderConfig::hasRequiresReverseOffload() const { + return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD; +} + +bool OpenMPIRBuilderConfig::hasRequiresUnifiedAddress() const { + return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS; +} + +bool OpenMPIRBuilderConfig::hasRequiresUnifiedSharedMemory() const { + return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY; +} + +bool OpenMPIRBuilderConfig::hasRequiresDynamicAllocators() const { + return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS; +} + +int64_t OpenMPIRBuilderConfig::getRequiresFlags() const { + return hasRequiresFlags() ? RequiresFlags + : static_cast(OMP_REQ_NONE); +} + +void OpenMPIRBuilderConfig::setHasRequiresReverseOffload(bool Value) { + if (Value) + RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD; + else + RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD; +} + +void OpenMPIRBuilderConfig::setHasRequiresUnifiedAddress(bool Value) { + if (Value) + RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS; + else + RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS; +} + +void OpenMPIRBuilderConfig::setHasRequiresUnifiedSharedMemory(bool Value) { + if (Value) + RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY; + else + RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY; +} + +void OpenMPIRBuilderConfig::setHasRequiresDynamicAllocators(bool Value) { + if (Value) + RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS; + else + RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS; +} + +//===----------------------------------------------------------------------===// +// OpenMPIRBuilder +//===----------------------------------------------------------------------===// + void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector &ArgsVector) { @@ -6106,6 +6206,39 @@ } } +Function *OpenMPIRBuilder::createRegisterRequires(StringRef Name) { + // Skip the creation of the registration function if this is device codegen + if (Config.isTargetDevice()) + return nullptr; + + Builder.ClearInsertionPoint(); + + // Create registration function prototype + auto *RegFnTy = FunctionType::get(Builder.getVoidTy(), {}); + auto *RegFn = Function::Create( + RegFnTy, GlobalVariable::LinkageTypes::InternalLinkage, Name, M); + RegFn->setSection(".text.startup"); + RegFn->addFnAttr(Attribute::NoInline); + RegFn->addFnAttr(Attribute::NoUnwind); + + // Create registration function body + auto *BB = BasicBlock::Create(M.getContext(), "entry", RegFn); + ConstantInt *FlagsVal = + ConstantInt::getSigned(Builder.getInt64Ty(), Config.getRequiresFlags()); + Function *RTLRegFn = getOrCreateRuntimeFunctionPtr( + omp::RuntimeFunction::OMPRTL___tgt_register_requires); + + Builder.SetInsertPoint(BB); + Builder.CreateCall(RTLRegFn, {FlagsVal}); + Builder.CreateRetVoid(); + + return RegFn; +} + +//===----------------------------------------------------------------------===// +// OffloadEntriesInfoManager +//===----------------------------------------------------------------------===// + bool OffloadEntriesInfoManager::empty() const { return OffloadEntriesTargetRegion.empty() && OffloadEntriesDeviceGlobalVar.empty(); @@ -6244,6 +6377,10 @@ Action(E.getKey(), E.getValue()); } +//===----------------------------------------------------------------------===// +// CanonicalLoopInfo +//===----------------------------------------------------------------------===// + void CanonicalLoopInfo::collectControlBlocks( SmallVectorImpl &BBs) { // We only count those BBs as control block for which we do not need to diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -5128,7 +5128,7 @@ using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.initialize(); - OpenMPIRBuilderConfig Config(false, false, false, false); + OpenMPIRBuilderConfig Config(false, false, false, false, false, false, false); OMPBuilder.setConfig(Config); F->setName("func"); IRBuilder<> Builder(BB); @@ -5206,7 +5206,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) { OpenMPIRBuilder OMPBuilder(*M); - OMPBuilder.setConfig(OpenMPIRBuilderConfig(true, false, false, false)); + OMPBuilder.setConfig( + OpenMPIRBuilderConfig(true, false, false, false, false, false, false)); OMPBuilder.initialize(); F->setName("func"); @@ -5896,7 +5897,8 @@ TEST_F(OpenMPIRBuilderTest, OffloadEntriesInfoManager) { OpenMPIRBuilder OMPBuilder(*M); - OMPBuilder.setConfig(OpenMPIRBuilderConfig(true, false, false, false)); + OMPBuilder.setConfig( + OpenMPIRBuilderConfig(true, false, false, false, false, false, false)); OffloadEntriesInfoManager &InfoManager = OMPBuilder.OffloadInfoManager; TargetRegionEntryInfo EntryInfo("parent", 1, 2, 4, 0); InfoManager.initializeTargetRegionEntryInfo(EntryInfo, 0); @@ -5919,7 +5921,7 @@ TEST_F(OpenMPIRBuilderTest, registerTargetGlobalVariable) { OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.initialize(); - OpenMPIRBuilderConfig Config(false, false, false, false); + OpenMPIRBuilderConfig Config(false, false, false, false, false, false, false); OMPBuilder.setConfig(Config); std::vector TargetTriple; @@ -5996,8 +5998,11 @@ OMPBuilder.initialize(); OpenMPIRBuilderConfig Config(/* IsTargetDevice = */ true, /* IsGPU = */ true, + /* OpenMPOffloadMandatory = */ false, + /* HasRequiresReverseOffload = */ false, + /* HasRequiresUnifiedAddress = */ false, /* HasRequiresUnifiedSharedMemory = */ false, - /* OpenMPOffloadMandatory = */ false); + /* HasRequiresDynamicAllocators = */ false); OMPBuilder.setConfig(Config); FunctionCallee FnTypeAndCallee = @@ -6033,4 +6038,44 @@ EXPECT_TRUE(Fn->hasFnAttribute(Attribute::MustProgress)); } +TEST_F(OpenMPIRBuilderTest, CreateRegisterRequires) { + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + + OpenMPIRBuilderConfig Config(/* IsTargetDevice = */ false, + /* IsGPU = */ false, + /* OpenMPOffloadMandatory = */ false, + /* HasRequiresReverseOffload = */ true, + /* HasRequiresUnifiedAddress = */ false, + /* HasRequiresUnifiedSharedMemory = */ true, + /* HasRequiresDynamicAllocators = */ false); + OMPBuilder.setConfig(Config); + + auto FName = + OMPBuilder.createPlatformSpecificName({"omp_offloading", "requires_reg"}); + EXPECT_EQ(FName, ".omp_offloading.requires_reg"); + + Function *Fn = OMPBuilder.createRegisterRequires(FName); + EXPECT_NE(Fn, nullptr); + EXPECT_EQ(FName, Fn->getName()); + + EXPECT_EQ(Fn->getSection(), ".text.startup"); + EXPECT_TRUE(Fn->hasInternalLinkage()); + EXPECT_TRUE(Fn->hasFnAttribute(Attribute::NoInline)); + EXPECT_TRUE(Fn->hasFnAttribute(Attribute::NoUnwind)); + EXPECT_EQ(Fn->size(), 1u); + + BasicBlock *Entry = &Fn->getEntryBlock(); + EXPECT_FALSE(Entry->empty()); + EXPECT_EQ(Fn->getReturnType()->getTypeID(), Type::VoidTyID); + + CallInst *Call = &cast(*Entry->begin()); + EXPECT_EQ(Call->getCalledFunction()->getName(), "__tgt_register_requires"); + EXPECT_EQ(Call->getNumOperands(), 2u); + + Value *Flags = Call->getArgOperand(0); + EXPECT_EQ(cast(Flags)->getSExtValue(), + OMPBuilder.Config.getRequiresFlags()); +} + } // namespace diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -1298,8 +1298,11 @@ // TODO: set the flags when available llvm::OpenMPIRBuilderConfig config( isTargetDevice, isGPU, - /* HasRequiresUnifiedSharedMemory */ false, - /* OpenMPOffloadMandatory */ false); + /* OpenMPOffloadMandatory = */ false, + /* HasRequiresReverseOffload = */ false, + /* HasRequiresUnifiedAddress = */ false, + /* HasRequiresUnifiedSharedMemory = */ false, + /* HasRequiresDynamicAllocators = */ false); ompBuilder->setConfig(config); } return ompBuilder.get();