diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -57,6 +57,28 @@ static constexpr auto TAG = "[" DEBUG_TYPE "]"; #endif +/// Apply \p CB to all uses of \p F. If \p LookThroughConstantExprUses is +/// true, constant expression users are not given to \p CB but their uses are +/// traversed transitively. +template +static void foreachUse(Function &F, CBTy CB, + bool LookThroughConstantExprUses = true) { + SmallVector Worklist(make_pointer_range(F.uses())); + + for (unsigned idx = 0; idx < Worklist.size(); ++idx) { + Use &U = *Worklist[idx]; + + // Allow use in constant bitcasts and simply look through them. + if (LookThroughConstantExprUses && isa(U.getUser())) { + for (Use &CEU : cast(U.getUser())->uses()) + Worklist.push_back(&CEU); + continue; + } + + CB(U); + } +} + /// Helper struct to store tracked ICV values at specif instructions. struct ICVValue { Instruction *Inst; @@ -102,11 +124,12 @@ /// Attributor runs. struct OMPInformationCache : public InformationCache { OMPInformationCache(Module &M, AnalysisGetter &AG, - BumpPtrAllocator &Allocator, SetVector *CGSCC, - SmallPtrSetImpl &ModuleSlice, + BumpPtrAllocator &Allocator, SetVector &CGSCC, SmallPtrSetImpl &Kernels) - : InformationCache(M, AG, Allocator, CGSCC), ModuleSlice(ModuleSlice), - OMPBuilder(M), Kernels(Kernels) { + : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M), + Kernels(Kernels) { + initializeModuleSlice(CGSCC); + OMPBuilder.initialize(); initializeRuntimeFunctions(); initializeInternalControlVars(); @@ -196,20 +219,20 @@ /// Run the callback \p CB on each use and forget the use if the result is /// true. The callback will be fed the function in which the use was /// encountered as second argument. - void foreachUse(function_ref CB) { - for (auto &It : UsesMap) - foreachUse(CB, It.first, It.second.get()); + void foreachUse(SmallVectorImpl &SCC, + function_ref CB) { + for (Function *F : SCC) + foreachUse(CB, F); } /// Run the callback \p CB on each use within the function \p F and forget /// the use if the result is true. - void foreachUse(function_ref CB, Function *F, - UseVector *Uses = nullptr) { + void foreachUse(function_ref CB, Function *F) { SmallVector ToBeDeleted; ToBeDeleted.clear(); unsigned Idx = 0; - UseVector &UV = Uses ? *Uses : getOrCreateUseVector(F); + UseVector &UV = getOrCreateUseVector(F); for (Use *U : UV) { if (CB(*U, *F)) @@ -232,8 +255,45 @@ DenseMap> UsesMap; }; + /// Initialize the ModuleSlice member based on \p SCC. ModuleSlices contains + /// (a subset of) all functions that we can look at during this SCC traversal. + /// This includes functions (transitively) called from the SCC and the + /// (transitive) callers of SCC functions. We also can look at a function if + /// there is a "reference edge", i.a., if the function somehow uses (!=calls) + /// a function in the SCC or a caller of a function in the SCC. + void initializeModuleSlice(SetVector &SCC) { + ModuleSlice.insert(SCC.begin(), SCC.end()); + + SmallPtrSet Seen; + SmallVector Worklist(SCC.begin(), SCC.end()); + while (!Worklist.empty()) { + Function *F = Worklist.pop_back_val(); + ModuleSlice.insert(F); + + for (Instruction &I : instructions(*F)) + if (auto *CB = dyn_cast(&I)) + if (Function *Callee = CB->getCalledFunction()) + if (Seen.insert(Callee).second) + Worklist.push_back(Callee); + } + + Seen.clear(); + Worklist.append(SCC.begin(), SCC.end()); + while (!Worklist.empty()) { + Function *F = Worklist.pop_back_val(); + ModuleSlice.insert(F); + + // Traverse all transitive uses. + foreachUse(*F, [&](Use &U) { + if (auto *UsrI = dyn_cast(U.getUser())) + if (Seen.insert(UsrI->getFunction()).second) + Worklist.push_back(UsrI->getFunction()); + }); + } + } + /// The slice of the module we are allowed to look at. - SmallPtrSetImpl &ModuleSlice; + SmallPtrSet ModuleSlice; /// An OpenMP-IR-Builder instance OpenMPIRBuilder OMPBuilder; @@ -548,7 +608,7 @@ return true; }; - RFI.foreachUse(DeleteCallCB); + RFI.foreachUse(SCC, DeleteCallCB); return Changed; } @@ -633,7 +693,7 @@ /* GlobalOnly */ true, SingleChoice); return false; }; - RFI.foreachUse(CombineIdentStruct); + RFI.foreachUse(SCC, CombineIdentStruct); if (!Ident || !SingleChoice) { // The IRBuilder uses the insertion block to get to the module, this is @@ -733,7 +793,7 @@ Changed = true; return true; }; - RFI.foreachUse(ReplaceAndDeleteCB); + RFI.foreachUse(SCC, ReplaceAndDeleteCB); return Changed; } @@ -776,7 +836,7 @@ OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]; - GlobThreadNumRFI.foreachUse([&](Use &U, Function &F) { + GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) { if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI)) AddUserArgs(*CI); return false; @@ -938,7 +998,7 @@ return true; }; - GetterRFI.foreachUse(ReplaceAndDeleteCB); + GetterRFI.foreachUse(ReplaceAndDeleteCB, getAnchorScope()); return Changed; } @@ -1048,12 +1108,9 @@ if (DisableOpenMPOptimizations) return PreservedAnalyses::all(); - SmallPtrSet ModuleSlice; SmallVector SCC; - for (LazyCallGraph::Node &N : C) { + for (LazyCallGraph::Node &N : C) SCC.push_back(&N.getFunction()); - ModuleSlice.insert(SCC.back()); - } if (SCC.empty()) return PreservedAnalyses::all(); @@ -1073,8 +1130,7 @@ SetVector Functions(SCC.begin(), SCC.end()); BumpPtrAllocator Allocator; OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, - /*CGSCC*/ &Functions, ModuleSlice, - OMPInModule.getKernels()); + /*CGSCC*/ Functions, OMPInModule.getKernels()); Attributor A(Functions, InfoCache, CGUpdater); @@ -1112,14 +1168,11 @@ if (DisableOpenMPOptimizations || skipSCC(CGSCC)) return false; - SmallPtrSet ModuleSlice; SmallVector SCC; for (CallGraphNode *CGN : CGSCC) if (Function *Fn = CGN->getFunction()) - if (!Fn->isDeclaration()) { + if (!Fn->isDeclaration()) SCC.push_back(Fn); - ModuleSlice.insert(Fn); - } if (SCC.empty()) return false; @@ -1141,7 +1194,7 @@ BumpPtrAllocator Allocator; OMPInformationCache InfoCache( *(Functions.back()->getParent()), AG, Allocator, - /*CGSCC*/ &Functions, ModuleSlice, OMPInModule.getKernels()); + /*CGSCC*/ Functions, OMPInModule.getKernels()); Attributor A(Functions, InfoCache, CGUpdater);