Index: llvm/include/llvm/Transforms/IPO/OpenMPOpt.h =================================================================== --- llvm/include/llvm/Transforms/IPO/OpenMPOpt.h +++ llvm/include/llvm/Transforms/IPO/OpenMPOpt.h @@ -9,14 +9,262 @@ #ifndef LLVM_TRANSFORMS_IPO_OPENMP_OPT_H #define LLVM_TRANSFORMS_IPO_OPENMP_OPT_H +#include "llvm/ADT/EnumeratedArray.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Frontend/OpenMP/OMPConstants.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" +#include "llvm/Transforms/IPO/Attributor.h" +#include "llvm/Transforms/Utils/CallGraphUpdater.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/IR/PassManager.h" +#include "llvm/Analysis/MemorySSA.h" namespace llvm { namespace omp { +using namespace types; + +/// OpenMP specific information. For now, stores RFIs and ICVs also needed for +/// Attributor runs. +struct OMPInformationCache : public InformationCache { + OMPInformationCache(Module &M, AnalysisGetter &AG, + BumpPtrAllocator &Allocator, SetVector *CGSCC, + SmallPtrSetImpl &ModuleSlice) + : InformationCache(M, AG, Allocator, CGSCC), ModuleSlice(ModuleSlice), + OMPBuilder(M) { + OMPBuilder.initialize(); + initializeRuntimeFunctions(); + initializeInternalControlVars(); + } + + /// Generic information that describes an internal control variable. + struct InternalControlVarInfo { + /// The kind, as described by InternalControlVar enum. + InternalControlVar Kind; + + /// The name of the ICV. + StringRef Name; + + /// Environment variable associated with this ICV. + StringRef EnvVarName; + + /// Initial value kind. + ICVInitValue InitKind; + + /// Initial value. + ConstantInt *InitValue; + + /// Setter RTL function associated with this ICV. + RuntimeFunction Setter; + + /// Getter RTL function associated with this ICV. + RuntimeFunction Getter; + + /// RTL Function corresponding to the override clause of this ICV + RuntimeFunction Clause; + }; + + /// Generic information that describes a runtime function + struct RuntimeFunctionInfo { + + /// The kind, as described by the RuntimeFunction enum. + RuntimeFunction Kind; + + /// The name of the function. + StringRef Name; + + /// Flag to indicate a variadic function. + bool IsVarArg; + + /// The return type of the function. + Type *ReturnType; + + /// The argument types of the function. + SmallVector ArgumentTypes; + + /// The declaration if available. + Function *Declaration = nullptr; + + /// Uses of this runtime function per function containing the use. + using UseVector = SmallVector; + + /// Return the vector of uses in function \p F. + UseVector &getOrCreateUseVector(Function *F) { + std::shared_ptr &UV = UsesMap[F]; + if (!UV) + UV = std::make_shared(); + return *UV; + } + + /// Return the vector of uses in function \p F or `nullptr` if there are + /// none. + const UseVector *getUseVector(Function &F) const { + auto I = UsesMap.find(&F); + if (I != UsesMap.end()) + return I->second.get(); + return nullptr; + } + + /// Return how many functions contain uses of this runtime function. + size_t getNumFunctionsWithUses() const { return UsesMap.size(); } + + /// Return the number of arguments (or the minimal number for variadic + /// functions). + size_t getNumArgs() const { return ArgumentTypes.size(); } + + /// Run the callback \p CB on each use and forget the use if the result is + /// true. The callback will be fed the function in which the use was + /// encountered as second argument. + void foreachUse(function_ref CB) { + for (auto &It : UsesMap) + foreachUse(CB, It.first, It.second.get()); + } + + /// Run the callback \p CB on each use within the function \p F and forget + /// the use if the result is true. + void foreachUse(function_ref CB, Function *F, + UseVector *Uses = nullptr); + + private: + /// Map from functions to all uses of this runtime function contained in + /// them. + DenseMap> UsesMap; + }; + + /// The slice of the module we are allowed to look at. + SmallPtrSetImpl &ModuleSlice; + + /// An OpenMP-IR-Builder instance + OpenMPIRBuilder OMPBuilder; + + /// Map from runtime function kind to the runtime function description. + EnumeratedArray + RFIs; + + /// Map from ICV kind to the ICV description. + EnumeratedArray + ICVs; + + /// Helper to initialize all internal control variable information for those + /// defined in OMPKinds.def. + void initializeInternalControlVars(); + + /// Helper to initialize all runtime function information for those defined + /// in OpenMPKinds.def. + void initializeRuntimeFunctions(); + + /// Returns true if the function declaration \p F matches the runtime + /// function types, that is, return type \p RTFRetType, and argument types + /// \p RTFArgTypes. + static bool declMatchesRTFTypes(Function *F, Type *RTFRetType, + SmallVector &RTFArgTypes); +}; + +struct OpenMPOpt { + + using OptimizationRemarkGetter = + function_ref; + + OpenMPOpt(SmallVectorImpl &SCC, CallGraphUpdater &CGUpdater, + OptimizationRemarkGetter OREGetter, + OMPInformationCache &OMPInfoCache, Attributor &A) + : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater), + OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {} + + /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. + bool run(); + + /// Return the call if \p U is a callee use in a regular call. If \p RFI is + /// given it has to be the callee or a nullptr is returned. + static CallInst *getCallIfRegularCall( + Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr); + + /// Return the call if \p V is a regular call. If \p RFI is given it has to be + /// the callee or a nullptr is returned. + static CallInst *getCallIfRegularCall( + Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr); + +private: + /// Try to delete parallel regions if possible. + bool deleteParallelRegions(); + + /// Try to eliminiate runtime calls by reusing existing ones. + bool deduplicateRuntimeCalls(); + + static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, + bool GlobalOnly, bool &SingleChoice); + + /// Return an `struct ident_t*` value that represents the ones used in the + /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not + /// return a local `struct ident_t*`. For now, if we cannot find a suitable + /// return value we create one from scratch. We also do not yet combine + /// information, e.g., the source locations, see combinedIdentStruct. + Value * + getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI, + Function &F, bool GlobalOnly); + + /// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or + /// \p ReplVal if given. + bool deduplicateRuntimeCalls(Function &F, + OMPInformationCache::RuntimeFunctionInfo &RFI, + Value *ReplVal = nullptr); + + /// Collect arguments that represent the global thread id in \p GTIdArgs. + void collectGlobalThreadIdArguments(SmallSetVector >IdArgs); + + /// Emit a remark generically + /// + /// This template function can be used to generically emit a remark. The + /// RemarkKind should be one of the following: + /// - OptimizationRemark to indicate a successful optimization attempt + /// - OptimizationRemarkMissed to report a failed optimization attempt + /// - OptimizationRemarkAnalysis to provide additional information about an + /// optimization attempt + /// + /// The remark is built using a callback function provided by the caller that + /// takes a RemarkKind as input and returns a RemarkKind. + template > + void emitRemark(Instruction *Inst, StringRef RemarkName, + RemarkCallBack &&RemarkCB); + + /// Emit a remark on a function. Since only OptimizationRemark is supporting + /// this, it can't be made generic. + void emitRemarkOnFunction( + Function *F, StringRef RemarkName, + function_ref &&RemarkCB); + + /// The underyling module. + Module &M; + + /// The SCC we are operating on. + SmallVectorImpl &SCC; + + /// Callback to update the call graph, the first argument is a removed call, + /// the second an optional replacement call. + CallGraphUpdater &CGUpdater; + + /// Callback to get an OptimizationRemarkEmitter from a Function * + OptimizationRemarkGetter OREGetter; + + /// OpenMP-specific information cache. Also Used for Attributor runs. + OMPInformationCache &OMPInfoCache; + + /// Attributor instance. + Attributor &A; + + /// Helper function to run Attributor on SCC. + bool runAttributor(); + + /// Populate the Attributor with abstract attribute opportunities in the + /// function. + void registerAAs(); +}; + /// Helper to remember if the module contains OpenMP (runtime calls), to be used /// foremost with containsOpenMP. struct OpenMPInModule { Index: llvm/lib/Transforms/IPO/OpenMPOpt.cpp =================================================================== --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -91,156 +91,34 @@ } // end namespace llvm -namespace { - -struct AAICVTracker; - -/// OpenMP specific information. For now, stores RFIs and ICVs also needed for -/// Attributor runs. -struct OMPInformationCache : public InformationCache { - OMPInformationCache(Module &M, AnalysisGetter &AG, - BumpPtrAllocator &Allocator, SetVector *CGSCC, - SmallPtrSetImpl &ModuleSlice) - : InformationCache(M, AG, Allocator, CGSCC), ModuleSlice(ModuleSlice), - OMPBuilder(M) { - OMPBuilder.initialize(); - initializeRuntimeFunctions(); - initializeInternalControlVars(); - } - - /// Generic information that describes an internal control variable. - struct InternalControlVarInfo { - /// The kind, as described by InternalControlVar enum. - InternalControlVar Kind; - - /// The name of the ICV. - StringRef Name; - - /// Environment variable associated with this ICV. - StringRef EnvVarName; - - /// Initial value kind. - ICVInitValue InitKind; - - /// Initial value. - ConstantInt *InitValue; - - /// Setter RTL function associated with this ICV. - RuntimeFunction Setter; - - /// Getter RTL function associated with this ICV. - RuntimeFunction Getter; - - /// RTL Function corresponding to the override clause of this ICV - RuntimeFunction Clause; - }; - - /// Generic information that describes a runtime function - struct RuntimeFunctionInfo { - - /// The kind, as described by the RuntimeFunction enum. - RuntimeFunction Kind; - - /// The name of the function. - StringRef Name; - - /// Flag to indicate a variadic function. - bool IsVarArg; - - /// The return type of the function. - Type *ReturnType; - - /// The argument types of the function. - SmallVector ArgumentTypes; - - /// The declaration if available. - Function *Declaration = nullptr; - - /// Uses of this runtime function per function containing the use. - using UseVector = SmallVector; - - /// Return the vector of uses in function \p F. - UseVector &getOrCreateUseVector(Function *F) { - std::shared_ptr &UV = UsesMap[F]; - if (!UV) - UV = std::make_shared(); - return *UV; - } - - /// Return the vector of uses in function \p F or `nullptr` if there are - /// none. - const UseVector *getUseVector(Function &F) const { - auto I = UsesMap.find(&F); - if (I != UsesMap.end()) - return I->second.get(); - return nullptr; - } - - /// Return how many functions contain uses of this runtime function. - size_t getNumFunctionsWithUses() const { return UsesMap.size(); } - - /// Return the number of arguments (or the minimal number for variadic - /// functions). - size_t getNumArgs() const { return ArgumentTypes.size(); } - - /// Run the callback \p CB on each use and forget the use if the result is - /// true. The callback will be fed the function in which the use was - /// encountered as second argument. - void foreachUse(function_ref CB) { - for (auto &It : UsesMap) - foreachUse(CB, It.first, It.second.get()); - } - - /// Run the callback \p CB on each use within the function \p F and forget - /// the use if the result is true. - void foreachUse(function_ref CB, Function *F, - UseVector *Uses = nullptr) { - SmallVector ToBeDeleted; - ToBeDeleted.clear(); - - unsigned Idx = 0; - UseVector &UV = Uses ? *Uses : getOrCreateUseVector(F); - - for (Use *U : UV) { - if (CB(*U, *F)) - ToBeDeleted.push_back(Idx); - ++Idx; - } - - // Remove the to-be-deleted indices in reverse order as prior - // modifcations will not modify the smaller indices. - while (!ToBeDeleted.empty()) { - unsigned Idx = ToBeDeleted.pop_back_val(); - UV[Idx] = UV.back(); - UV.pop_back(); - } - } - - private: - /// Map from functions to all uses of this runtime function contained in - /// them. - DenseMap> UsesMap; - }; +//===----------------------------------------------------------------------===// +// Definitions of the OMPInformationCache helper structure. +//===----------------------------------------------------------------------===// - /// The slice of the module we are allowed to look at. - SmallPtrSetImpl &ModuleSlice; +void OMPInformationCache::RuntimeFunctionInfo::foreachUse( + function_ref CB, Function *F, UseVector *Uses) { + SmallVector ToBeDeleted; + ToBeDeleted.clear(); - /// An OpenMP-IR-Builder instance - OpenMPIRBuilder OMPBuilder; + unsigned Idx = 0; + UseVector &UV = Uses ? *Uses : getOrCreateUseVector(F); - /// Map from runtime function kind to the runtime function description. - EnumeratedArray - RFIs; + for (Use *U : UV) { + if (CB(*U, *F)) + ToBeDeleted.push_back(Idx); + ++Idx; + } - /// Map from ICV kind to the ICV description. - EnumeratedArray - ICVs; + // Remove the to-be-deleted indices in reverse order as prior + // modifcations will not modify the smaller indices. + while (!ToBeDeleted.empty()) { + unsigned Idx = ToBeDeleted.pop_back_val(); + UV[Idx] = UV.back(); + UV.pop_back(); + } +} - /// Helper to initialize all internal control variable information for those - /// defined in OMPKinds.def. - void initializeInternalControlVars() { +void OMPInformationCache::initializeInternalControlVars() { #define ICV_RT_SET(_Name, RTL) \ { \ auto &ICV = ICVs[_Name]; \ @@ -274,63 +152,35 @@ } \ } #include "llvm/Frontend/OpenMP/OMPKinds.def" - } - - /// Returns true if the function declaration \p F matches the runtime - /// function types, that is, return type \p RTFRetType, and argument types - /// \p RTFArgTypes. - static bool declMatchesRTFTypes(Function *F, Type *RTFRetType, - SmallVector &RTFArgTypes) { - // TODO: We should output information to the user (under debug output - // and via remarks). - - if (!F) - return false; - if (F->getReturnType() != RTFRetType) - return false; - if (F->arg_size() != RTFArgTypes.size()) - return false; - - auto RTFTyIt = RTFArgTypes.begin(); - for (Argument &Arg : F->args()) { - if (Arg.getType() != *RTFTyIt) - return false; +} - ++RTFTyIt; - } +void OMPInformationCache::initializeRuntimeFunctions() { + // Helper to collect all uses of the decleration in the UsesMap. + auto CollectUses = [&](RuntimeFunctionInfo &RFI) { + unsigned NumUses = 0; + if (!RFI.Declaration) + return NumUses; + OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); - return true; - } + NumOpenMPRuntimeFunctionsIdentified += 1; + NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); - /// Helper to initialize all runtime function information for those defined - /// in OpenMPKinds.def. - void initializeRuntimeFunctions() { - // Helper to collect all uses of the decleration in the UsesMap. - auto CollectUses = [&](RuntimeFunctionInfo &RFI) { - unsigned NumUses = 0; - if (!RFI.Declaration) - return NumUses; - OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); - - NumOpenMPRuntimeFunctionsIdentified += 1; - NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); - - // TODO: We directly convert uses into proper calls and unknown uses. - for (Use &U : RFI.Declaration->uses()) { - if (Instruction *UserI = dyn_cast(U.getUser())) { - if (ModuleSlice.count(UserI->getFunction())) { - RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U); - ++NumUses; - } - } else { - RFI.getOrCreateUseVector(nullptr).push_back(&U); + // TODO: We directly convert uses into proper calls and unknown uses. + for (Use &U : RFI.Declaration->uses()) { + if (Instruction *UserI = dyn_cast(U.getUser())) { + if (ModuleSlice.count(UserI->getFunction())) { + RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U); ++NumUses; } + } else { + RFI.getOrCreateUseVector(nullptr).push_back(&U); + ++NumUses; } - return NumUses; - }; + } + return NumUses; + }; - Module &M = *((*ModuleSlice.begin())->getParent()); + Module &M = *((*ModuleSlice.begin())->getParent()); #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ { \ @@ -358,442 +208,36 @@ } #include "llvm/Frontend/OpenMP/OMPKinds.def" - // TODO: We should attach the attributes defined in OMPKinds.def. - } -}; - -struct OpenMPOpt { - - using OptimizationRemarkGetter = - function_ref; - - OpenMPOpt(SmallVectorImpl &SCC, CallGraphUpdater &CGUpdater, - OptimizationRemarkGetter OREGetter, - OMPInformationCache &OMPInfoCache, Attributor &A) - : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater), - OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {} - - /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. - bool run() { - bool Changed = false; - - LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size() - << " functions in a slice with " - << OMPInfoCache.ModuleSlice.size() << " functions\n"); - - /// Print initial ICV values for testing. - /// FIXME: This should be done from the Attributor once it is added. - if (PrintICVValues) { - InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel}; - - for (Function *F : OMPInfoCache.ModuleSlice) { - for (auto ICV : ICVs) { - auto ICVInfo = OMPInfoCache.ICVs[ICV]; - auto Remark = [&](OptimizationRemark OR) { - return OR << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name) - << " Value: " - << (ICVInfo.InitValue - ? ICVInfo.InitValue->getValue().toString(10, true) - : "IMPLEMENTATION_DEFINED"); - }; - - emitRemarkOnFunction(F, "OpenMPICVTracker", Remark); - } - } - } - - Changed |= runAttributor(); - Changed |= deduplicateRuntimeCalls(); - Changed |= deleteParallelRegions(); - - return Changed; - } - - /// Return the call if \p U is a callee use in a regular call. If \p RFI is - /// given it has to be the callee or a nullptr is returned. - static CallInst *getCallIfRegularCall( - Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { - CallInst *CI = dyn_cast(U.getUser()); - if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && - (!RFI || CI->getCalledFunction() == RFI->Declaration)) - return CI; - return nullptr; - } - - /// Return the call if \p V is a regular call. If \p RFI is given it has to be - /// the callee or a nullptr is returned. - static CallInst *getCallIfRegularCall( - Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { - CallInst *CI = dyn_cast(&V); - if (CI && !CI->hasOperandBundles() && - (!RFI || CI->getCalledFunction() == RFI->Declaration)) - return CI; - return nullptr; - } - -private: - /// Try to delete parallel regions if possible. - bool deleteParallelRegions() { - const unsigned CallbackCalleeOperand = 2; - - OMPInformationCache::RuntimeFunctionInfo &RFI = - OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; - - if (!RFI.Declaration) - return false; - - bool Changed = false; - auto DeleteCallCB = [&](Use &U, Function &) { - CallInst *CI = getCallIfRegularCall(U); - if (!CI) - return false; - auto *Fn = dyn_cast( - CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts()); - if (!Fn) - return false; - if (!Fn->onlyReadsMemory()) - return false; - if (!Fn->hasFnAttribute(Attribute::WillReturn)) - return false; - - LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in " - << CI->getCaller()->getName() << "\n"); - - auto Remark = [&](OptimizationRemark OR) { - return OR << "Parallel region in " - << ore::NV("OpenMPParallelDelete", CI->getCaller()->getName()) - << " deleted"; - }; - emitRemark(CI, "OpenMPParallelRegionDeletion", - Remark); - - CGUpdater.removeCallSite(*CI); - CI->eraseFromParent(); - Changed = true; - ++NumOpenMPParallelRegionsDeleted; - return true; - }; - - RFI.foreachUse(DeleteCallCB); - - return Changed; - } - - /// Try to eliminiate runtime calls by reusing existing ones. - bool deduplicateRuntimeCalls() { - bool Changed = false; - - RuntimeFunction DeduplicableRuntimeCallIDs[] = { - OMPRTL_omp_get_num_threads, - OMPRTL_omp_in_parallel, - OMPRTL_omp_get_cancellation, - OMPRTL_omp_get_thread_limit, - OMPRTL_omp_get_supported_active_levels, - OMPRTL_omp_get_level, - OMPRTL_omp_get_ancestor_thread_num, - OMPRTL_omp_get_team_size, - OMPRTL_omp_get_active_level, - OMPRTL_omp_in_final, - OMPRTL_omp_get_proc_bind, - OMPRTL_omp_get_num_places, - OMPRTL_omp_get_num_procs, - OMPRTL_omp_get_place_num, - OMPRTL_omp_get_partition_num_places, - OMPRTL_omp_get_partition_place_nums}; - - // Global-tid is handled separately. - SmallSetVector GTIdArgs; - collectGlobalThreadIdArguments(GTIdArgs); - LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size() - << " global thread ID arguments\n"); - - for (Function *F : SCC) { - for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs) - deduplicateRuntimeCalls(*F, - OMPInfoCache.RFIs[DeduplicableRuntimeCallID]); - - // __kmpc_global_thread_num is special as we can replace it with an - // argument in enough cases to make it worth trying. - Value *GTIdArg = nullptr; - for (Argument &Arg : F->args()) - if (GTIdArgs.count(&Arg)) { - GTIdArg = &Arg; - break; - } - Changed |= deduplicateRuntimeCalls( - *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); - } - - return Changed; - } - - static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, - bool GlobalOnly, bool &SingleChoice) { - if (CurrentIdent == NextIdent) - return CurrentIdent; - - // TODO: Figure out how to actually combine multiple debug locations. For - // now we just keep an existing one if there is a single choice. - if (!GlobalOnly || isa(NextIdent)) { - SingleChoice = !CurrentIdent; - return NextIdent; - } - return nullptr; - } - - /// Return an `struct ident_t*` value that represents the ones used in the - /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not - /// return a local `struct ident_t*`. For now, if we cannot find a suitable - /// return value we create one from scratch. We also do not yet combine - /// information, e.g., the source locations, see combinedIdentStruct. - Value * - getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI, - Function &F, bool GlobalOnly) { - bool SingleChoice = true; - Value *Ident = nullptr; - auto CombineIdentStruct = [&](Use &U, Function &Caller) { - CallInst *CI = getCallIfRegularCall(U, &RFI); - if (!CI || &F != &Caller) - return false; - Ident = combinedIdentStruct(Ident, CI->getArgOperand(0), - /* GlobalOnly */ true, SingleChoice); - return false; - }; - RFI.foreachUse(CombineIdentStruct); - - if (!Ident || !SingleChoice) { - // The IRBuilder uses the insertion block to get to the module, this is - // unfortunate but we work around it for now. - if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock()) - OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy( - &F.getEntryBlock(), F.getEntryBlock().begin())); - // Create a fallback location if non was found. - // TODO: Use the debug locations of the calls instead. - Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(); - Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc); - } - return Ident; - } - - /// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or - /// \p ReplVal if given. - bool deduplicateRuntimeCalls(Function &F, - OMPInformationCache::RuntimeFunctionInfo &RFI, - Value *ReplVal = nullptr) { - auto *UV = RFI.getUseVector(F); - if (!UV || UV->size() + (ReplVal != nullptr) < 2) - return false; - - LLVM_DEBUG( - dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name - << (ReplVal ? " with an existing value\n" : "\n") << "\n"); - - assert((!ReplVal || (isa(ReplVal) && - cast(ReplVal)->getParent() == &F)) && - "Unexpected replacement value!"); - - // TODO: Use dominance to find a good position instead. - auto CanBeMoved = [](CallBase &CB) { - unsigned NumArgs = CB.getNumArgOperands(); - if (NumArgs == 0) - return true; - if (CB.getArgOperand(0)->getType() != IdentPtr) - return false; - for (unsigned u = 1; u < NumArgs; ++u) - if (isa(CB.getArgOperand(u))) - return false; - return true; - }; - - if (!ReplVal) { - for (Use *U : *UV) - if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { - if (!CanBeMoved(*CI)) - continue; - - auto Remark = [&](OptimizationRemark OR) { - auto newLoc = &*F.getEntryBlock().getFirstInsertionPt(); - return OR << "OpenMP runtime call " - << ore::NV("OpenMPOptRuntime", RFI.Name) << " moved to " - << ore::NV("OpenMPRuntimeMoves", newLoc->getDebugLoc()); - }; - emitRemark(CI, "OpenMPRuntimeCodeMotion", Remark); - - CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); - ReplVal = CI; - break; - } - if (!ReplVal) - return false; - } - - // If we use a call as a replacement value we need to make sure the ident is - // valid at the new location. For now we just pick a global one, either - // existing and used by one of the calls, or created from scratch. - if (CallBase *CI = dyn_cast(ReplVal)) { - if (CI->getNumArgOperands() > 0 && - CI->getArgOperand(0)->getType() == IdentPtr) { - Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F, - /* GlobalOnly */ true); - CI->setArgOperand(0, Ident); - } - } - - bool Changed = false; - auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { - CallInst *CI = getCallIfRegularCall(U, &RFI); - if (!CI || CI == ReplVal || &F != &Caller) - return false; - assert(CI->getCaller() == &F && "Unexpected call!"); - - auto Remark = [&](OptimizationRemark OR) { - return OR << "OpenMP runtime call " - << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated"; - }; - emitRemark(CI, "OpenMPRuntimeDeduplicated", Remark); - - CGUpdater.removeCallSite(*CI); - CI->replaceAllUsesWith(ReplVal); - CI->eraseFromParent(); - ++NumOpenMPRuntimeCallsDeduplicated; - Changed = true; - return true; - }; - RFI.foreachUse(ReplaceAndDeleteCB); - - return Changed; - } - - /// Collect arguments that represent the global thread id in \p GTIdArgs. - void collectGlobalThreadIdArguments(SmallSetVector >IdArgs) { - // TODO: Below we basically perform a fixpoint iteration with a pessimistic - // initialization. We could define an AbstractAttribute instead and - // run the Attributor here once it can be run as an SCC pass. - - // Helper to check the argument \p ArgNo at all call sites of \p F for - // a GTId. - auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) { - if (!F.hasLocalLinkage()) - return false; - for (Use &U : F.uses()) { - if (CallInst *CI = getCallIfRegularCall(U)) { - Value *ArgOp = CI->getArgOperand(ArgNo); - if (CI == &RefCI || GTIdArgs.count(ArgOp) || - getCallIfRegularCall( - *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num])) - continue; - } - return false; - } - return true; - }; - - // Helper to identify uses of a GTId as GTId arguments. - auto AddUserArgs = [&](Value >Id) { - for (Use &U : GTId.uses()) - if (CallInst *CI = dyn_cast(U.getUser())) - if (CI->isArgOperand(&U)) - if (Function *Callee = CI->getCalledFunction()) - if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI)) - GTIdArgs.insert(Callee->getArg(U.getOperandNo())); - }; - - // The argument users of __kmpc_global_thread_num calls are GTIds. - OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI = - OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]; - - GlobThreadNumRFI.foreachUse([&](Use &U, Function &F) { - if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI)) - AddUserArgs(*CI); - return false; - }); - - // Transitively search for more arguments by looking at the users of the - // ones we know already. During the search the GTIdArgs vector is extended - // so we cannot cache the size nor can we use a range based for. - for (unsigned u = 0; u < GTIdArgs.size(); ++u) - AddUserArgs(*GTIdArgs[u]); - } - - /// Emit a remark generically - /// - /// This template function can be used to generically emit a remark. The - /// RemarkKind should be one of the following: - /// - OptimizationRemark to indicate a successful optimization attempt - /// - OptimizationRemarkMissed to report a failed optimization attempt - /// - OptimizationRemarkAnalysis to provide additional information about an - /// optimization attempt - /// - /// The remark is built using a callback function provided by the caller that - /// takes a RemarkKind as input and returns a RemarkKind. - template > - void emitRemark(Instruction *Inst, StringRef RemarkName, - RemarkCallBack &&RemarkCB) { - Function *F = Inst->getParent()->getParent(); - auto &ORE = OREGetter(F); - - ORE.emit( - [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst)); }); - } - - /// Emit a remark on a function. Since only OptimizationRemark is supporting - /// this, it can't be made generic. - void emitRemarkOnFunction( - Function *F, StringRef RemarkName, - function_ref &&RemarkCB) { - auto &ORE = OREGetter(F); - - ORE.emit([&]() { - return RemarkCB(OptimizationRemark(DEBUG_TYPE, RemarkName, F)); - }); - } - - /// The underyling module. - Module &M; - - /// The SCC we are operating on. - SmallVectorImpl &SCC; - - /// Callback to update the call graph, the first argument is a removed call, - /// the second an optional replacement call. - CallGraphUpdater &CGUpdater; - - /// Callback to get an OptimizationRemarkEmitter from a Function * - OptimizationRemarkGetter OREGetter; + // TODO: We should attach the attributes defined in OMPKinds.def. +} - /// OpenMP-specific information cache. Also Used for Attributor runs. - OMPInformationCache &OMPInfoCache; +bool OMPInformationCache::declMatchesRTFTypes( + Function *F, Type *RTFRetType, SmallVector &RTFArgTypes) { + // TODO: We should output information to the user (under debug output + // and via remarks). - /// Attributor instance. - Attributor &A; + if (!F) + return false; + if (F->getReturnType() != RTFRetType) + return false; + if (F->arg_size() != RTFArgTypes.size()) + return false; - /// Helper function to run Attributor on SCC. - bool runAttributor() { - if (SCC.empty()) + auto RTFTyIt = RTFArgTypes.begin(); + for (Argument &Arg : F->args()) { + if (Arg.getType() != *RTFTyIt) return false; - registerAAs(); - - ChangeStatus Changed = A.run(); - - LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size() - << " functions, result: " << Changed << ".\n"); - - return Changed == ChangeStatus::CHANGED; + ++RTFTyIt; } - /// Populate the Attributor with abstract attribute opportunities in the - /// function. - void registerAAs() { - for (Function *F : SCC) { - if (F->isDeclaration()) - continue; + return true; +} - A.getOrCreateAAFor(IRPosition::function(*F)); - } - } -}; +//===----------------------------------------------------------------------===// +// Declarations and definitions of AAICVTracker. +//===----------------------------------------------------------------------===// +namespace { /// Abstract Attribute for tracking ICV values. struct AAICVTracker : public StateWrapper { @@ -869,7 +313,7 @@ // Map of ICV to their values at specific program point. EnumeratedArray, InternalControlVar, - InternalControlVar::ICV___last> + InternalControlVar::ICV___last> ICVValuesMap; // Currently only nthreads is being tracked. @@ -964,6 +408,385 @@ return *AA; } +//===----------------------------------------------------------------------===// +// Definitions of the OpenMPOpt structure. +//===----------------------------------------------------------------------===// + +bool OpenMPOpt::run() { + bool Changed = false; + + LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size() + << " functions in a slice with " + << OMPInfoCache.ModuleSlice.size() << " functions\n"); + + /// Print initial ICV values for testing. + /// FIXME: This should be done from the Attributor once it is added. + if (PrintICVValues) { + InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel}; + + for (Function *F : OMPInfoCache.ModuleSlice) { + for (auto ICV : ICVs) { + auto ICVInfo = OMPInfoCache.ICVs[ICV]; + auto Remark = [&](OptimizationRemark OR) { + return OR << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name) + << " Value: " + << (ICVInfo.InitValue + ? ICVInfo.InitValue->getValue().toString(10, true) + : "IMPLEMENTATION_DEFINED"); + }; + + emitRemarkOnFunction(F, "OpenMPICVTracker", Remark); + } + } + } + + Changed |= runAttributor(); + Changed |= deduplicateRuntimeCalls(); + Changed |= deleteParallelRegions(); + + return Changed; +} + +CallInst *OpenMPOpt::getCallIfRegularCall( + Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI) { + CallInst *CI = dyn_cast(U.getUser()); + if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && + (!RFI || CI->getCalledFunction() == RFI->Declaration)) + return CI; + return nullptr; +} + +CallInst *OpenMPOpt::getCallIfRegularCall( + Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI) { + CallInst *CI = dyn_cast(&V); + if (CI && !CI->hasOperandBundles() && + (!RFI || CI->getCalledFunction() == RFI->Declaration)) + return CI; + return nullptr; +} + +bool OpenMPOpt::deleteParallelRegions() { + const unsigned CallbackCalleeOperand = 2; + + OMPInformationCache::RuntimeFunctionInfo &RFI = + OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; + + if (!RFI.Declaration) + return false; + + bool Changed = false; + auto DeleteCallCB = [&](Use &U, Function &) { + CallInst *CI = getCallIfRegularCall(U); + if (!CI) + return false; + auto *Fn = dyn_cast( + CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts()); + if (!Fn) + return false; + if (!Fn->onlyReadsMemory()) + return false; + if (!Fn->hasFnAttribute(Attribute::WillReturn)) + return false; + + LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in " + << CI->getCaller()->getName() << "\n"); + + auto Remark = [&](OptimizationRemark OR) { + return OR << "Parallel region in " + << ore::NV("OpenMPParallelDelete", CI->getCaller()->getName()) + << " deleted"; + }; + emitRemark(CI, "OpenMPParallelRegionDeletion", + Remark); + + CGUpdater.removeCallSite(*CI); + CI->eraseFromParent(); + Changed = true; + ++NumOpenMPParallelRegionsDeleted; + return true; + }; + + RFI.foreachUse(DeleteCallCB); + + return Changed; +} + +bool OpenMPOpt::deduplicateRuntimeCalls() { + bool Changed = false; + + RuntimeFunction DeduplicableRuntimeCallIDs[] = { + OMPRTL_omp_get_num_threads, + OMPRTL_omp_in_parallel, + OMPRTL_omp_get_cancellation, + OMPRTL_omp_get_thread_limit, + OMPRTL_omp_get_supported_active_levels, + OMPRTL_omp_get_level, + OMPRTL_omp_get_ancestor_thread_num, + OMPRTL_omp_get_team_size, + OMPRTL_omp_get_active_level, + OMPRTL_omp_in_final, + OMPRTL_omp_get_proc_bind, + OMPRTL_omp_get_num_places, + OMPRTL_omp_get_num_procs, + OMPRTL_omp_get_place_num, + OMPRTL_omp_get_partition_num_places, + OMPRTL_omp_get_partition_place_nums}; + + // Global-tid is handled separately. + SmallSetVector GTIdArgs; + collectGlobalThreadIdArguments(GTIdArgs); + LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size() + << " global thread ID arguments\n"); + + for (Function *F : SCC) { + for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs) + deduplicateRuntimeCalls(*F, + OMPInfoCache.RFIs[DeduplicableRuntimeCallID]); + + // __kmpc_global_thread_num is special as we can replace it with an + // argument in enough cases to make it worth trying. + Value *GTIdArg = nullptr; + for (Argument &Arg : F->args()) + if (GTIdArgs.count(&Arg)) { + GTIdArg = &Arg; + break; + } + Changed |= deduplicateRuntimeCalls( + *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); + } + + return Changed; +} + +Value *OpenMPOpt::combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, + bool GlobalOnly, bool &SingleChoice) { + if (CurrentIdent == NextIdent) + return CurrentIdent; + + // TODO: Figure out how to actually combine multiple debug locations. For + // now we just keep an existing one if there is a single choice. + if (!GlobalOnly || isa(NextIdent)) { + SingleChoice = !CurrentIdent; + return NextIdent; + } + return nullptr; +} + +Value * OpenMPOpt::getCombinedIdentFromCallUsesIn( + OMPInformationCache::RuntimeFunctionInfo &RFI, + Function &F, bool GlobalOnly) { + bool SingleChoice = true; + Value *Ident = nullptr; + auto CombineIdentStruct = [&](Use &U, Function &Caller) { + CallInst *CI = getCallIfRegularCall(U, &RFI); + if (!CI || &F != &Caller) + return false; + Ident = combinedIdentStruct(Ident, CI->getArgOperand(0), + /* GlobalOnly */ true, SingleChoice); + return false; + }; + RFI.foreachUse(CombineIdentStruct); + + if (!Ident || !SingleChoice) { + // The IRBuilder uses the insertion block to get to the module, this is + // unfortunate but we work around it for now. + if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock()) + OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy( + &F.getEntryBlock(), F.getEntryBlock().begin())); + // Create a fallback location if non was found. + // TODO: Use the debug locations of the calls instead. + Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(); + Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc); + } + return Ident; +} + +bool OpenMPOpt::deduplicateRuntimeCalls( + Function &F, OMPInformationCache::RuntimeFunctionInfo &RFI, + Value *ReplVal) { + auto *UV = RFI.getUseVector(F); + if (!UV || UV->size() + (ReplVal != nullptr) < 2) + return false; + + LLVM_DEBUG( + dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name + << (ReplVal ? " with an existing value\n" : "\n") << "\n"); + + assert((!ReplVal || (isa(ReplVal) && + cast(ReplVal)->getParent() == &F)) && + "Unexpected replacement value!"); + + // TODO: Use dominance to find a good position instead. + auto CanBeMoved = [](CallBase &CB) { + unsigned NumArgs = CB.getNumArgOperands(); + if (NumArgs == 0) + return true; + if (CB.getArgOperand(0)->getType() != IdentPtr) + return false; + for (unsigned u = 1; u < NumArgs; ++u) + if (isa(CB.getArgOperand(u))) + return false; + return true; + }; + + if (!ReplVal) { + for (Use *U : *UV) + if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { + if (!CanBeMoved(*CI)) + continue; + + auto Remark = [&](OptimizationRemark OR) { + auto newLoc = &*F.getEntryBlock().getFirstInsertionPt(); + return OR << "OpenMP runtime call " + << ore::NV("OpenMPOptRuntime", RFI.Name) << " moved to " + << ore::NV("OpenMPRuntimeMoves", newLoc->getDebugLoc()); + }; + emitRemark(CI, "OpenMPRuntimeCodeMotion", Remark); + + CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); + ReplVal = CI; + break; + } + if (!ReplVal) + return false; + } + + // If we use a call as a replacement value we need to make sure the ident is + // valid at the new location. For now we just pick a global one, either + // existing and used by one of the calls, or created from scratch. + if (CallBase *CI = dyn_cast(ReplVal)) { + if (CI->getNumArgOperands() > 0 && + CI->getArgOperand(0)->getType() == IdentPtr) { + Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F, + /* GlobalOnly */ true); + CI->setArgOperand(0, Ident); + } + } + + bool Changed = false; + auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { + CallInst *CI = getCallIfRegularCall(U, &RFI); + if (!CI || CI == ReplVal || &F != &Caller) + return false; + assert(CI->getCaller() == &F && "Unexpected call!"); + + auto Remark = [&](OptimizationRemark OR) { + return OR << "OpenMP runtime call " + << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated"; + }; + emitRemark(CI, "OpenMPRuntimeDeduplicated", Remark); + + CGUpdater.removeCallSite(*CI); + CI->replaceAllUsesWith(ReplVal); + CI->eraseFromParent(); + ++NumOpenMPRuntimeCallsDeduplicated; + Changed = true; + return true; + }; + RFI.foreachUse(ReplaceAndDeleteCB); + + return Changed; +} + +void OpenMPOpt::collectGlobalThreadIdArguments( + SmallSetVector >IdArgs) { + // TODO: Below we basically perform a fixpoint iteration with a pessimistic + // initialization. We could define an AbstractAttribute instead and + // run the Attributor here once it can be run as an SCC pass. + + // Helper to check the argument \p ArgNo at all call sites of \p F for + // a GTId. + auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) { + if (!F.hasLocalLinkage()) + return false; + for (Use &U : F.uses()) { + if (CallInst *CI = getCallIfRegularCall(U)) { + Value *ArgOp = CI->getArgOperand(ArgNo); + if (CI == &RefCI || GTIdArgs.count(ArgOp) || + getCallIfRegularCall( + *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num])) + continue; + } + return false; + } + return true; + }; + + // Helper to identify uses of a GTId as GTId arguments. + auto AddUserArgs = [&](Value >Id) { + for (Use &U : GTId.uses()) + if (CallInst *CI = dyn_cast(U.getUser())) + if (CI->isArgOperand(&U)) + if (Function *Callee = CI->getCalledFunction()) + if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI)) + GTIdArgs.insert(Callee->getArg(U.getOperandNo())); + }; + + // The argument users of __kmpc_global_thread_num calls are GTIds. + OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI = + OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]; + + GlobThreadNumRFI.foreachUse([&](Use &U, Function &F) { + if (CallInst *CI = + getCallIfRegularCall(U, &GlobThreadNumRFI)) + AddUserArgs(*CI); + return false; + }); + + // Transitively search for more arguments by looking at the users of the + // ones we know already. During the search the GTIdArgs vector is extended + // so we cannot cache the size nor can we use a range based for. + for (unsigned u = 0; u < GTIdArgs.size(); ++u) + AddUserArgs(*GTIdArgs[u]); +} + +template +void OpenMPOpt::emitRemark(Instruction *Inst, StringRef RemarkName, + RemarkCallBack &&RemarkCB) { + Function *F = Inst->getParent()->getParent(); + auto &ORE = OREGetter(F); + + ORE.emit( + [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst)); }); +} + +void OpenMPOpt::emitRemarkOnFunction( + Function *F, StringRef RemarkName, + function_ref &&RemarkCB) { + auto &ORE = OREGetter(F); + + ORE.emit([&]() { + return RemarkCB(OptimizationRemark(DEBUG_TYPE, RemarkName, F)); + }); +} + +bool OpenMPOpt::runAttributor() { + if (SCC.empty()) + return false; + + registerAAs(); + + ChangeStatus Changed = A.run(); + + LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size() + << " functions, result: " << Changed << ".\n"); + + return Changed == ChangeStatus::CHANGED; +} + +void OpenMPOpt::registerAAs() { + for (Function *F : SCC) { + if (F->isDeclaration()) + continue; + + A.getOrCreateAAFor(IRPosition::function(*F)); + } +} + +//===----------------------------------------------------------------------===// +// Definitions of the OpenMPOptPass. +//===----------------------------------------------------------------------===// + PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR) {