diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -48,6 +48,16 @@ #define OMP_CLAUSE(Enum, ...) constexpr auto Enum = omp::Clause::Enum; #include "llvm/Frontend/OpenMP/OMPKinds.def" +/// IDs for all Internal Control Variables (ICVs). +enum class InternalControlVar { +#define ICV_DATA_ENV(Enum, ...) Enum, +#include "llvm/Frontend/OpenMP/OMPKinds.def" +}; + +#define ICV_DATA_ENV(Enum, ...) \ + constexpr auto Enum = omp::InternalControlVar::Enum; +#include "llvm/Frontend/OpenMP/OMPKinds.def" + /// IDs for all omp runtime library (RTL) functions. enum class RuntimeFunction { #define OMP_RTL(Enum, ...) Enum, diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -310,6 +310,62 @@ ///} +/// Internal Control Variables information +/// +///{ + +#ifndef ICV_DATA_ENV +#define ICV_DATA_ENV(Name, EnvVarName, Init) +#endif + +#define __ICV_DATA_ENV(Name, EnvVarName, Init) \ + ICV_DATA_ENV(ICV_##Name, #EnvVarName, Init) + +__ICV_DATA_ENV(nthreads, OMP_NUM_THREADS, IMPLEMENTATION_DEFINED) +__ICV_DATA_ENV(__last, last, LAST) + +#undef __ICV_DATA_ENV +#undef ICV_DATA_ENV + +#ifndef ICV_RT_SET +#define ICV_RT_SET(Name, RTL) +#endif + +#define __ICV_RT_SET(Name, RTL) ICV_RT_SET(ICV_##Name, OMPRTL_##RTL) + +__ICV_RT_SET(nthreads, omp_set_num_threads) + +#undef __ICV_RT_SET +#undef ICV_RT_SET + +#ifndef ICV_RT_GET +#define ICV_RT_GET(Name, RTL) +#endif + +#define __ICV_RT_GET(Name, RTL) ICV_RT_GET(ICV_##Name, OMPRTL_##RTL) + +__ICV_RT_GET(nthreads, omp_get_max_threads) + +#undef __ICV_RT_GET +#undef ICV_RT_GET + +#ifndef ICV_CLAUSE +#define ICV_CLAUSE(Name, Clause) +#endif + +#define __ICV_ClAUSE(Name, Clause) ICV_CLAUSE(ICV_##Name, Clause) + +#undef __ICV_CLAUSE +#undef ICV_CLAUSE + +// #ifndef ICV_VALUE_TYPE +// #define ICV_VALUE_TYPE +// #endif + +// #define __ICV_VALUE_TYPE(Name, Type) ICV_VALUE_TYPE(Name, Type) + +///} + /// Runtime library function (and their attributes) /// ///{ diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -814,10 +814,10 @@ /// the abstract attributes. /// \param CGUpdater Helper to update an underlying call graph. /// \param Whitelist If not null, a set limiting the attribute opportunities. - Attributor(SetVector &Functions, InformationCache &InfoCache, + Attributor(SetVector &Functions, InformationCache *InfoCache, CallGraphUpdater &CGUpdater, DenseSet *Whitelist = nullptr) - : Allocator(InfoCache.Allocator), Functions(Functions), + : Allocator(InfoCache->Allocator), Functions(Functions), InfoCache(InfoCache), CGUpdater(CGUpdater), Whitelist(Whitelist) {} ~Attributor(); @@ -872,6 +872,61 @@ /* ForceUpdate */ true); } + /// FIXME: should this be allowed to be public, or should there be another + // way to create AA, outside of the Attributor, without the + // QueryingAA? + /// The private version of getAAFor that allows to omit a querying abstract + /// attribute. See also the public getAAFor method. + template + const AAType &getOrCreateAAFor(const IRPosition &IRP, + const AbstractAttribute *QueryingAA = nullptr, + bool TrackDependence = false, + DepClassTy DepClass = DepClassTy::OPTIONAL, + bool ForceUpdate = false) { + if (AAType *AAPtr = lookupAAFor(IRP, QueryingAA, TrackDependence)) { + if (ForceUpdate) + updateAA(*AAPtr); + return *AAPtr; + } + + // No matching attribute found, create one. + // Use the static create method. + auto &AA = AAType::createForPosition(IRP, *this); + registerAA(AA); + + // For now we ignore naked and optnone functions. + bool Invalidate = Whitelist && !Whitelist->count(&AAType::ID); + const Function *FnScope = IRP.getAnchorScope(); + if (FnScope) + Invalidate |= FnScope->hasFnAttribute(Attribute::Naked) || + FnScope->hasFnAttribute(Attribute::OptimizeNone); + + // Bootstrap the new attribute with an initial update to propagate + // information, e.g., function -> call site. If it is not on a given + // whitelist we will not perform updates at all. + if (Invalidate) { + AA.getState().indicatePessimisticFixpoint(); + return AA; + } + + AA.initialize(*this); + + // We can initialize (=look at) code outside the current function set but + // not call update because that would again spawn new abstract attributes in + // potentially unconnected code regions (=SCCs). + if (FnScope && !Functions.count(const_cast(FnScope))) { + AA.getState().indicatePessimisticFixpoint(); + return AA; + } + + updateAA(AA); + + if (TrackDependence && AA.getState().isValidState()) + recordDependence(AA, const_cast(*QueryingAA), + DepClass); + return AA; + } + /// Explicitly record a dependence from \p FromAA to \p ToAA, that is if /// \p FromAA changes \p ToAA should be updated as well. /// @@ -910,7 +965,7 @@ } /// Return the internal information cache. - InformationCache &getInfoCache() { return InfoCache; } + InformationCache &getInfoCache() { return *InfoCache; } /// Return true if this is a module pass, false otherwise. bool isModulePass() const { @@ -940,7 +995,7 @@ /// If a function is exactly defined or it has alwaysinline attribute /// and is viable to be inlined, we say it is IPO amendable bool isFunctionIPOAmendable(const Function &F) { - return F.hasExactDefinition() || InfoCache.InlineableFunctions.count(&F); + return F.hasExactDefinition() || InfoCache->InlineableFunctions.count(&F); } /// Mark the internal function \p F as live. @@ -1207,7 +1262,7 @@ AbstractAttribute &QueryingAA); /// Return the data layout associated with the anchor scope. - const DataLayout &getDataLayout() const { return InfoCache.DL; } + const DataLayout &getDataLayout() const { return InfoCache->DL; } /// The allocator used to allocate memory, e.g. for `AbstractAttribute`s. BumpPtrAllocator &Allocator; @@ -1233,58 +1288,6 @@ const AbstractAttribute *QueryingAA, bool &AllCallSitesKnown); - /// The private version of getAAFor that allows to omit a querying abstract - /// attribute. See also the public getAAFor method. - template - const AAType &getOrCreateAAFor(const IRPosition &IRP, - const AbstractAttribute *QueryingAA = nullptr, - bool TrackDependence = false, - DepClassTy DepClass = DepClassTy::OPTIONAL, - bool ForceUpdate = false) { - if (AAType *AAPtr = lookupAAFor(IRP, QueryingAA, TrackDependence)) { - if (ForceUpdate) - updateAA(*AAPtr); - return *AAPtr; - } - - // No matching attribute found, create one. - // Use the static create method. - auto &AA = AAType::createForPosition(IRP, *this); - registerAA(AA); - - // For now we ignore naked and optnone functions. - bool Invalidate = Whitelist && !Whitelist->count(&AAType::ID); - const Function *FnScope = IRP.getAnchorScope(); - if (FnScope) - Invalidate |= FnScope->hasFnAttribute(Attribute::Naked) || - FnScope->hasFnAttribute(Attribute::OptimizeNone); - - // Bootstrap the new attribute with an initial update to propagate - // information, e.g., function -> call site. If it is not on a given - // whitelist we will not perform updates at all. - if (Invalidate) { - AA.getState().indicatePessimisticFixpoint(); - return AA; - } - - AA.initialize(*this); - - // We can initialize (=look at) code outside the current function set but - // not call update because that would again spawn new abstract attributes in - // potentially unconnected code regions (=SCCs). - if (FnScope && !Functions.count(const_cast(FnScope))) { - AA.getState().indicatePessimisticFixpoint(); - return AA; - } - - updateAA(AA); - - if (TrackDependence && AA.getState().isValidState()) - recordDependence(AA, const_cast(*QueryingAA), - DepClass); - return AA; - } - /// Return the attribute of \p AAType for \p IRP if existing. template AAType *lookupAAFor(const IRPosition &IRP, @@ -1340,7 +1343,7 @@ SetVector &Functions; /// The information cache that holds pre-processed (LLVM-IR) information. - InformationCache &InfoCache; + InformationCache *InfoCache; /// Helper to update an underlying call graph. CallGraphUpdater &CGUpdater; @@ -2955,6 +2958,26 @@ static const char ID; }; +/// An abstract interface for tracking OpenMp Internal Control Variables (ICVs). +// struct AAICVTracker : public StateWrapper { +// using Base = StateWrapper; +// AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {} + +// /// Returns true if value is assumed to be tracked. +// bool isAssumedTracked() const { return getAssumed(); } + +// /// Returns true if value is known to be tracked. +// bool isKnownTracked() const { return getKnown(); } + +// /// Create an abstract attribute view for the position \p IRP. +// static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A); + +// // Return the value with which /p I can be replaced. +// virtual Value *getReplacementValue(const Instruction *I) = 0; + +// static const char ID; +// }; + /// Run options, used by the pass manager. enum AttributorRunOption { NONE = 0, diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -860,7 +860,7 @@ getAAFor(QueryingAA, QueryIRP, /* TrackDependence */ false); auto &OpcodeInstMap = - InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction); + InfoCache->getOpcodeInstMapForFunction(*AssociatedFunction); if (!checkForAllInstructionsImpl(this, OpcodeInstMap, Pred, &QueryingAA, &LivenessAA, Opcodes, CheckBBLivenessOnly)) return false; @@ -882,7 +882,7 @@ getAAFor(QueryingAA, QueryIRP, /* TrackDependence */ false); for (Instruction *I : - InfoCache.getReadOrWriteInstsForFunction(*AssociatedFunction)) { + InfoCache->getReadOrWriteInstsForFunction(*AssociatedFunction)) { // Skip dead instructions. if (isAssumedDead(IRPosition::value(*I), &QueryingAA, &LivenessAA)) continue; @@ -1386,7 +1386,7 @@ // Forbid must-tail calls for now. // TODO: - auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*Fn); + auto &OpcodeInstMap = InfoCache->getOpcodeInstMapForFunction(*Fn); if (!checkForAllInstructionsImpl(nullptr, OpcodeInstMap, InstPred, nullptr, nullptr, {Instruction::Call})) { LLVM_DEBUG(dbgs() << "[Attributor] Cannot rewrite due to instructions\n"); @@ -1739,7 +1739,7 @@ // In non-module runs we need to look at the call sites of a function to // determine if it is part of a must-tail call edge. This will influence what // attributes we can derive. - InformationCache::FunctionInfo &FI = InfoCache.getFunctionInfo(F); + InformationCache::FunctionInfo &FI = InfoCache->getFunctionInfo(F); if (!isModulePass() && !FI.CalledViaMustTail) { for (const Use &U : F.uses()) if (const auto *CB = dyn_cast(U.getUser())) @@ -1921,7 +1921,7 @@ return true; }; - auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); + auto &OpcodeInstMap = InfoCache->getOpcodeInstMapForFunction(F); bool Success; Success = checkForAllInstructionsImpl( nullptr, OpcodeInstMap, CallSitePred, nullptr, nullptr, @@ -2022,7 +2022,7 @@ // Create an Attributor and initially empty information cache that is filled // while we identify default attribute opportunities. - Attributor A(Functions, InfoCache, CGUpdater); + Attributor A(Functions, &InfoCache, CGUpdater); // Create shallow wrappers for all functions that are not IPO amendable if (AllowShallowWrappers) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -24,6 +24,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/Attributor.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" using namespace llvm; @@ -49,21 +50,52 @@ #endif namespace { -struct OpenMPOpt { - using OptimizationRemarkGetter = - function_ref; - - OpenMPOpt(SmallVectorImpl &SCC, - SmallPtrSetImpl &ModuleSlice, - CallGraphUpdater &CGUpdater, OptimizationRemarkGetter OREGetter) - : M(*(*SCC.begin())->getParent()), SCC(SCC), ModuleSlice(ModuleSlice), - OMPBuilder(M), CGUpdater(CGUpdater), OREGetter(OREGetter) { +// FIXME: Put in Attributor.h or keep this? +struct AAICVTracker; + +/// OpenMP specific information. For now, stores RFIs and ICVs also needed for +/// Attributor runs. +/// FIXME: Maybe put this in a header file? +struct OMPInformationCache : public InformationCache { + OMPInformationCache(Module &M, AnalysisGetter &AG, + BumpPtrAllocator &Allocator, SetVector *CGSCC, + SmallPtrSetImpl &ModuleSlice) + : InformationCache(M, AG, Allocator, CGSCC), ModuleSlice(ModuleSlice), + OMPBuilder(M) { initializeTypes(M); initializeRuntimeFunctions(); + initializeInternalControlVars(); + OMPBuilder.initialize(); } + /// Generic information that describes an internal control variable. + struct InternalControlVarInfo { + /// The kind, as described by InternalControlVar enum. + InternalControlVar Kind; + + /// Environment variable associated with this ICV. + StringRef EnvVarName; + + /// Setter RTL function associated with this ICV. + RuntimeFunction Setter; + + /// Getter RTL function associated with this ICV. + RuntimeFunction Getter; + + /// RTL Function corresponding to the override clause of this ICV + RuntimeFunction Clause; + + /// TODO: define these. + /// Type of the ICV. + // Type *ValueType; + // + /// Initial Value + + /// Scope. + }; + /// Generic information that describes a runtime function struct RuntimeFunctionInfo { @@ -90,9 +122,9 @@ /// Return the vector of uses in function \p F. UseVector &getOrCreateUseVector(Function *F) { - std::unique_ptr &UV = UsesMap[F]; + std::shared_ptr &UV = UsesMap[F]; if (!UV) - UV = std::make_unique(); + UV = std::make_shared(); return *UV; } @@ -112,37 +144,175 @@ /// functions). size_t getNumArgs() const { return ArgumentTypes.size(); } - /// Run the callback \p CB on each use and forget the use if the result is - /// true. The callback will be fed the function in which the use was - /// encountered as second argument. void foreachUse(function_ref CB) { + for (auto &It : UsesMap) + foreachUse(CB, It.first, It.second.get()); + } + + void foreachUse(function_ref CB, Function *F, + UseVector *Uses = nullptr) { SmallVector ToBeDeleted; - for (auto &It : UsesMap) { - ToBeDeleted.clear(); - unsigned Idx = 0; - UseVector &UV = *It.second; - for (Use *U : UV) { - if (CB(*U, *It.first)) - ToBeDeleted.push_back(Idx); - ++Idx; - } + ToBeDeleted.clear(); + unsigned Idx = 0; + UseVector &UV = Uses ? *Uses : getOrCreateUseVector(F); + + for (Use *U : UV) { + if (CB(*U, *F)) + ToBeDeleted.push_back(Idx); + ++Idx; + } - // Remove the to-be-deleted indices in reverse order as prior - // modifcations will not modify the smaller indices. - while (!ToBeDeleted.empty()) { - unsigned Idx = ToBeDeleted.pop_back_val(); - UV[Idx] = UV.back(); - UV.pop_back(); - } + // Remove the to-be-deleted indices in reverse order as prior + // modifcations will not modify the smaller indices. + while (!ToBeDeleted.empty()) { + unsigned Idx = ToBeDeleted.pop_back_val(); + UV[Idx] = UV.back(); + UV.pop_back(); } } private: /// Map from functions to all uses of this runtime function contained in /// them. - DenseMap> UsesMap; + DenseMap> UsesMap; }; + /// The slice of the module we are allowed to look at. + SmallPtrSetImpl &ModuleSlice; + + /// An OpenMP-IR-Builder instance + OpenMPIRBuilder OMPBuilder; + + /// Map from runtime function kind to the runtime function description. + EnumeratedArray + RFIs; + + /// Map from ICV kind to the ICV description. + EnumeratedArray + ICVs; + + /// Helper to initialize all internal control variable information for those + /// defined in OMPKinds.def. + void initializeInternalControlVars() { +#define ICV_RT_SET(_Name, RTL) \ + { \ + auto &ICV = ICVs[_Name]; \ + ICV.Setter = RTL; \ + } +#define ICV_RT_GET(Name, RTL) \ + { \ + auto &ICV = ICVs[Name]; \ + ICV.Getter = RTL; \ + } +#define ICV_DATA_ENV(Name, _EnvVarName, Init) \ + { \ + auto &ICV = ICVs[Name]; \ + ICV.Kind = Name; \ + ICV.EnvVarName = _EnvVarName; \ + } +#include "llvm/Frontend/OpenMP/OMPKinds.def" + } + + /// Returns true if the function declaration \p F matches the runtime + /// function types, that is, return type \p RTFRetType, and argument types + /// \p RTFArgTypes. + static bool declMatchesRTFTypes(Function *F, Type *RTFRetType, + SmallVector &RTFArgTypes) { + // TODO: We should output information to the user (under debug output + // and via remarks). + + if (!F) + return false; + if (F->getReturnType() != RTFRetType) + return false; + if (F->arg_size() != RTFArgTypes.size()) + return false; + + auto RTFTyIt = RTFArgTypes.begin(); + for (Argument &Arg : F->args()) { + if (Arg.getType() != *RTFTyIt) + return false; + + ++RTFTyIt; + } + + return true; + } + + /// Helper to initialize all runtime function information for those defined + /// in OpenMPKinds.def. + void initializeRuntimeFunctions() { + // Helper to collect all uses of the decleration in the UsesMap. + auto CollectUses = [&](RuntimeFunctionInfo &RFI) { + unsigned NumUses = 0; + if (!RFI.Declaration) + return NumUses; + OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); + + NumOpenMPRuntimeFunctionsIdentified += 1; + NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); + + // TODO: We directly convert uses into proper calls and unknown uses. + for (Use &U : RFI.Declaration->uses()) { + if (Instruction *UserI = dyn_cast(U.getUser())) { + if (ModuleSlice.count(UserI->getFunction())) { + RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U); + ++NumUses; + } + } else { + RFI.getOrCreateUseVector(nullptr).push_back(&U); + ++NumUses; + } + } + return NumUses; + }; + + Module &M = *((*ModuleSlice.begin())->getParent()); + +#define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ + { \ + SmallVector ArgsTypes({__VA_ARGS__}); \ + Function *F = M.getFunction(_Name); \ + if (declMatchesRTFTypes(F, _ReturnType, ArgsTypes)) { \ + auto &RFI = RFIs[_Enum]; \ + RFI.Kind = _Enum; \ + RFI.Name = _Name; \ + RFI.IsVarArg = _IsVarArg; \ + RFI.ReturnType = _ReturnType; \ + RFI.ArgumentTypes = std::move(ArgsTypes); \ + RFI.Declaration = F; \ + unsigned NumUses = CollectUses(RFI); \ + (void)NumUses; \ + LLVM_DEBUG({ \ + dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \ + << " found\n"; \ + if (RFI.Declaration) \ + dbgs() << TAG << "-> got " << NumUses << " uses in " \ + << RFI.getNumFunctionsWithUses() \ + << " different functions.\n"; \ + }); \ + } \ + } +#include "llvm/Frontend/OpenMP/OMPKinds.def" + + // TODO: We should attach the attributes defined in OMPKinds.def. + } +}; + +struct OpenMPOpt { + + using OptimizationRemarkGetter = + function_ref; + + OpenMPOpt(SmallVectorImpl &SCC, CallGraphUpdater &CGUpdater, + OptimizationRemarkGetter OREGetter, + OMPInformationCache *OMPInfoCache) + : M(*(*SCC.begin())->getParent()), SCC(SCC), + ModuleSlice(OMPInfoCache->ModuleSlice), CGUpdater(CGUpdater), + OREGetter(OREGetter), OMPInfoCache(OMPInfoCache) {} + /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. bool run() { bool Changed = false; @@ -151,18 +321,45 @@ << " functions in a slice with " << ModuleSlice.size() << " functions\n"); + Changed |= runAttributor(); + + // Initialize custom attribute set. Changed |= deduplicateRuntimeCalls(); Changed |= deleteParallelRegions(); return Changed; } + /// Return the call if \p U is a callee use in a regular call. If \p RFI is + /// given it has to be the callee or a nullptr is returned. + static CallInst *getCallIfRegularCall( + Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { + CallInst *CI = dyn_cast(U.getUser()); + if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && + (!RFI || CI->getCalledFunction() == RFI->Declaration)) + return CI; + return nullptr; + } + + /// Return the call if \p V is a regular call. If \p RFI is given it has to be + /// the callee or a nullptr is returned. + static CallInst *getCallIfRegularCall( + Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { + CallInst *CI = dyn_cast(&V); + if (CI && !CI->hasOperandBundles() && + (!RFI || CI->getCalledFunction() == RFI->Declaration)) + return CI; + return nullptr; + } + private: /// Try to delete parallel regions if possible. bool deleteParallelRegions() { const unsigned CallbackCalleeOperand = 2; - RuntimeFunctionInfo &RFI = RFIs[OMPRTL___kmpc_fork_call]; + OMPInformationCache::RuntimeFunctionInfo &RFI = + OMPInfoCache->RFIs[OMPRTL___kmpc_fork_call]; + if (!RFI.Declaration) return false; @@ -232,7 +429,8 @@ for (Function *F : SCC) { for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs) - deduplicateRuntimeCalls(*F, RFIs[DeduplicableRuntimeCallID]); + deduplicateRuntimeCalls(*F, + OMPInfoCache->RFIs[DeduplicableRuntimeCallID]); // __kmpc_global_thread_num is special as we can replace it with an // argument in enough cases to make it worth trying. @@ -243,7 +441,7 @@ break; } Changed |= deduplicateRuntimeCalls( - *F, RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); + *F, OMPInfoCache->RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); } return Changed; @@ -268,8 +466,9 @@ /// return a local `struct ident_t*`. For now, if we cannot find a suitable /// return value we create one from scratch. We also do not yet combine /// information, e.g., the source locations, see combinedIdentStruct. - Value *getCombinedIdentFromCallUsesIn(RuntimeFunctionInfo &RFI, Function &F, - bool GlobalOnly) { + Value * + getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI, + Function &F, bool GlobalOnly) { bool SingleChoice = true; Value *Ident = nullptr; auto CombineIdentStruct = [&](Use &U, Function &Caller) { @@ -285,29 +484,30 @@ if (!Ident || !SingleChoice) { // The IRBuilder uses the insertion block to get to the module, this is // unfortunate but we work around it for now. - if (!OMPBuilder.getInsertionPoint().getBlock()) - OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy( - &F.getEntryBlock(), F.getEntryBlock().begin())); + if (!OMPInfoCache->OMPBuilder.getInsertionPoint().getBlock()) + OMPInfoCache->OMPBuilder.updateToLocation( + OpenMPIRBuilder::InsertPointTy(&F.getEntryBlock(), + F.getEntryBlock().begin())); // Create a fallback location if non was found. // TODO: Use the debug locations of the calls instead. - Constant *Loc = OMPBuilder.getOrCreateDefaultSrcLocStr(); - Ident = OMPBuilder.getOrCreateIdent(Loc); + Constant *Loc = OMPInfoCache->OMPBuilder.getOrCreateDefaultSrcLocStr(); + Ident = OMPInfoCache->OMPBuilder.getOrCreateIdent(Loc); } return Ident; } /// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or /// \p ReplVal if given. - bool deduplicateRuntimeCalls(Function &F, RuntimeFunctionInfo &RFI, + bool deduplicateRuntimeCalls(Function &F, + OMPInformationCache::RuntimeFunctionInfo &RFI, Value *ReplVal = nullptr) { auto *UV = RFI.getUseVector(F); if (!UV || UV->size() + (ReplVal != nullptr) < 2) return false; - LLVM_DEBUG(dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " - << RFI.Name - << (ReplVal ? " with an existing value\n" : "\n") - << "\n"); + LLVM_DEBUG( + dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name + << (ReplVal ? " with an existing value\n" : "\n") << "\n"); assert((!ReplVal || (isa(ReplVal) && cast(ReplVal)->getParent() == &F)) && "Unexpected replacement value!"); @@ -399,8 +599,8 @@ if (CallInst *CI = getCallIfRegularCall(U)) { Value *ArgOp = CI->getArgOperand(ArgNo); if (CI == &RefCI || GTIdArgs.count(ArgOp) || - getCallIfRegularCall(*ArgOp, - &RFIs[OMPRTL___kmpc_global_thread_num])) + getCallIfRegularCall( + *ArgOp, &OMPInfoCache->RFIs[OMPRTL___kmpc_global_thread_num])) continue; } return false; @@ -419,8 +619,9 @@ }; // The argument users of __kmpc_global_thread_num calls are GTIds. - RuntimeFunctionInfo &GlobThreadNumRFI = - RFIs[OMPRTL___kmpc_global_thread_num]; + OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI = + OMPInfoCache->RFIs[OMPRTL___kmpc_global_thread_num]; + GlobThreadNumRFI.foreachUse([&](Use &U, Function &F) { if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI)) AddUserArgs(*CI); @@ -434,109 +635,6 @@ AddUserArgs(*GTIdArgs[u]); } - /// Return the call if \p U is a callee use in a regular call. If \p RFI is - /// given it has to be the callee or a nullptr is returned. - CallInst *getCallIfRegularCall(Use &U, RuntimeFunctionInfo *RFI = nullptr) { - CallInst *CI = dyn_cast(U.getUser()); - if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && - (!RFI || CI->getCalledFunction() == RFI->Declaration)) - return CI; - return nullptr; - } - - /// Return the call if \p V is a regular call. If \p RFI is given it has to be - /// the callee or a nullptr is returned. - CallInst *getCallIfRegularCall(Value &V, RuntimeFunctionInfo *RFI = nullptr) { - CallInst *CI = dyn_cast(&V); - if (CI && !CI->hasOperandBundles() && - (!RFI || CI->getCalledFunction() == RFI->Declaration)) - return CI; - return nullptr; - } - - /// Returns true if the function declaration \p F matches the runtime - /// function types, that is, return type \p RTFRetType, and argument types - /// \p RTFArgTypes. - static bool declMatchesRTFTypes(Function *F, Type *RTFRetType, - SmallVector &RTFArgTypes) { - // TODO: We should output information to the user (under debug output - // and via remarks). - - if (!F) - return false; - if (F->getReturnType() != RTFRetType) - return false; - if (F->arg_size() != RTFArgTypes.size()) - return false; - - auto RTFTyIt = RTFArgTypes.begin(); - for (Argument &Arg : F->args()) { - if (Arg.getType() != *RTFTyIt) - return false; - - ++RTFTyIt; - } - - return true; - } - - /// Helper to initialize all runtime function information for those defined in - /// OpenMPKinds.def. - void initializeRuntimeFunctions() { - // Helper to collect all uses of the decleration in the UsesMap. - auto CollectUses = [&](RuntimeFunctionInfo &RFI) { - unsigned NumUses = 0; - if (!RFI.Declaration) - return NumUses; - OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); - - NumOpenMPRuntimeFunctionsIdentified += 1; - NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); - - // TODO: We directly convert uses into proper calls and unknown uses. - for (Use &U : RFI.Declaration->uses()) { - if (Instruction *UserI = dyn_cast(U.getUser())) { - if (ModuleSlice.count(UserI->getFunction())) { - RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U); - ++NumUses; - } - } else { - RFI.getOrCreateUseVector(nullptr).push_back(&U); - ++NumUses; - } - } - return NumUses; - }; - -#define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ - { \ - SmallVector ArgsTypes({__VA_ARGS__}); \ - Function *F = M.getFunction(_Name); \ - if (declMatchesRTFTypes(F, _ReturnType, ArgsTypes)) { \ - auto &RFI = RFIs[_Enum]; \ - RFI.Kind = _Enum; \ - RFI.Name = _Name; \ - RFI.IsVarArg = _IsVarArg; \ - RFI.ReturnType = _ReturnType; \ - RFI.ArgumentTypes = std::move(ArgsTypes); \ - RFI.Declaration = F; \ - unsigned NumUses = CollectUses(RFI); \ - (void)NumUses; \ - LLVM_DEBUG({ \ - dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \ - << " found\n"; \ - if (RFI.Declaration) \ - dbgs() << TAG << "-> got " << NumUses << " uses in " \ - << RFI.getNumFunctionsWithUses() \ - << " different functions.\n"; \ - }); \ - } \ - } -#include "llvm/Frontend/OpenMP/OMPKinds.def" - - // TODO: We should attach the attributes defined in OMPKinds.def. - } - /// Emit a remark generically /// /// This template function can be used to generically emit a remark. The @@ -555,9 +653,8 @@ Function *F = Inst->getParent()->getParent(); auto &ORE = OREGetter(F); - ORE.emit([&]() { - return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst)); - }); + ORE.emit( + [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst)); }); } /// The underyling module. @@ -570,7 +667,7 @@ SmallPtrSetImpl &ModuleSlice; /// An OpenMP-IR-Builder instance - OpenMPIRBuilder OMPBuilder; + // OpenMPIRBuilder OMPBuilder; /// Callback to update the call graph, the first argument is a removed call, /// the second an optional replacement call. @@ -579,13 +676,172 @@ /// Callback to get an OptimizationRemarkEmitter from a Function * OptimizationRemarkGetter OREGetter; - /// Map from runtime function kind to the runtime function description. - EnumeratedArray - RFIs; + /// OpenMP-specific information cache. Also used for Attributor runs. + OMPInformationCache *OMPInfoCache; + + bool runAttributor() { + if (SCC.empty()) + return false; + + // TODO: Expose runAttributorOnFunctions() or have an Attributor instance? + + // TODO: WhiteList some attributes? AAIsDead? + + SetVector Functions(SCC.begin(), SCC.end()); + + // Create an Attributor with openmp-specific infocache + Attributor A(Functions, OMPInfoCache, CGUpdater); + + registerAAs(A, Functions); + + ChangeStatus Changed = A.run(); + + LLVM_DEBUG(dbgs() << "[Attributor] Done with " << Functions.size() + << " functions, result: " << Changed << ".\n"); + + return Changed == ChangeStatus::CHANGED; + } + + void registerAAs(Attributor &A, SetVector Functions) { + for (Function *F : Functions) { + if (F->isDeclaration()) + continue; + + A.getOrCreateAAFor(IRPosition::function(*F)); + } + } +}; + +struct AAICVTracker : public StateWrapper { + using Base = StateWrapper; + AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {} + + /// Returns true if value is assumed to be tracked. + bool isAssumedTracked() const { return getAssumed(); } + + /// Returns true if value is known to be tracked. + bool isKnownTracked() const { return getAssumed(); } + + /// TODO: figure out how to get hold of ICV infos here. + /// Create an abstract attribute biew for the position \p IRP. + static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A); + + // virtual Value *getReplacementValue(const Instruction *I) = 0; + virtual Value *getReplacementValue(InternalControlVar ICV, + const Instruction *I) = 0; + + static const char ID; +}; + +struct AAICVTrackerFunction : public AAICVTracker { + AAICVTrackerFunction(const IRPosition &IRP, Attributor &A) + : AAICVTracker(IRP, A) {} + + // FIXME: come up with better string. + const std::string getAsStr() const override { return "ICVTracker"; } + + // FIXME: come up with some stats. + void trackStatistics() const override {} + + // void initialize(Attributor &A) override { + // Instruction *Entry = &(getAnchorScope()->getEntryBlock().front()); + + // /// Implementation defined value. + // ICVValuesMap[Entry] = nullptr; + // } + + /// TODO: decide whether to deduplicate here, or use current + /// deduplicateRuntimeCalls function. + ChangeStatus manifest(Attributor &A) override { + return ChangeStatus::UNCHANGED; + } + + // Map of ICV to their values at specific program point. + EnumeratedArray, 4>, + InternalControlVar, InternalControlVar::ICV___last> + ICVValuesMap; + + ChangeStatus updateImpl(Attributor &A) override { + ChangeStatus HasChanged = ChangeStatus::UNCHANGED; + InternalControlVar TrackableICVs[] = { + // Currently only nthreads is being tracked + ICV_nthreads}; + + Function *F = getAnchorScope(); + + auto OMPInfoCache = static_cast(&(A.getInfoCache())); + + for (InternalControlVar ICV : TrackableICVs) { + auto Setter = OMPInfoCache->RFIs[OMPInfoCache->ICVs[ICV].Setter]; + + auto TrackValues = [&](Use &U, Function &) { + CallInst *CI = OpenMPOpt::getCallIfRegularCall(U); + if (!CI) + return false; + + // FIXME: handle setters with more that 1 arguments. + /// Track new value. + if (ICVValuesMap[ICV].insert(std::make_pair(CI, CI->getArgOperand(0)))) + HasChanged = ChangeStatus::CHANGED; + + return false; + }; + + Setter.foreachUse(TrackValues, F); + } + + return HasChanged; + } + + /// Return the value with which \p can be replaced. + Value *getReplacementValue(InternalControlVar ICV, + const Instruction *I) override { + BasicBlock *CurrBB = const_cast(I->getParent()); + + auto ValuesSet = ICVValuesMap[ICV]; + + for (auto &pair : ValuesSet) + if (CurrBB == pair.first->getParent()) + if (pair.first->comesBefore(I)) + // both instructions are in the same BB and at \p I we know the ICV + // value. + return pair.second; + + // first predecessor that contains one of the tracked instructions + // contains the tracked value. + for (BasicBlock *Pred : predecessors(CurrBB)) + for (auto pair : ValuesSet) + if (pair.first->getParent() == Pred) + return pair.second; + + // No value was tracked. + return nullptr; + } }; } // namespace +const char AAICVTracker::ID = 0; + +AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP, + Attributor &A) { + AAICVTracker *AA = nullptr; + switch (IRP.getPositionKind()) { + case IRPosition::IRP_INVALID: + case IRPosition::IRP_FLOAT: + case IRPosition::IRP_ARGUMENT: + case IRPosition::IRP_RETURNED: + case IRPosition::IRP_CALL_SITE_RETURNED: + case IRPosition::IRP_CALL_SITE_ARGUMENT: + case IRPosition::IRP_CALL_SITE: + llvm_unreachable("ICVTracker can only be created for function position!"); + case IRPosition::IRP_FUNCTION: + AA = new (A.Allocator) AAICVTrackerFunction(IRP, A); + break; + } + + return *AA; +} + PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR) { @@ -605,16 +861,25 @@ if (SCC.empty()) return PreservedAnalyses::all(); - auto OREGetter = [&C, &CG, &AM](Function *F) -> OptimizationRemarkEmitter & { - FunctionAnalysisManager &FAM = - AM.getResult(C, CG).getManager(); + FunctionAnalysisManager &FAM = + AM.getResult(C, CG).getManager(); + + AnalysisGetter AG(FAM); + + auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { return FAM.getResult(*F); }; CallGraphUpdater CGUpdater; CGUpdater.initialize(CG, C, AM, UR); + + SetVector Functions(SCC.begin(), SCC.end()); + BumpPtrAllocator Allocator; + OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, + /*CGSCC*/ &Functions, ModuleSlice); + // TODO: Compute the module slice we are allowed to look at. - OpenMPOpt OMPOpt(SCC, ModuleSlice, CGUpdater, OREGetter); + OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, &InfoCache); bool Changed = OMPOpt.run(); (void)Changed; return PreservedAnalyses::all(); @@ -671,8 +936,14 @@ return *ORE; }; + AnalysisGetter AG; + SetVector Functions(SCC.begin(), SCC.end()); + BumpPtrAllocator Allocator; + OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, + Allocator, + /*CGSCC*/ &Functions, ModuleSlice); // TODO: Compute the module slice we are allowed to look at. - OpenMPOpt OMPOpt(SCC, ModuleSlice, CGUpdater, OREGetter); + OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, &InfoCache); return OMPOpt.run(); }