diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -75,6 +75,7 @@ void initializeAssumptionCacheTrackerPass(PassRegistry&); void initializeAtomicExpandPass(PassRegistry&); void initializeAttributorLegacyPassPass(PassRegistry&); +void initializeAttributorCGSCCLegacyPassPass(PassRegistry &); void initializeBDCELegacyPassPass(PassRegistry&); void initializeBarrierNoopPass(PassRegistry&); void initializeBasicAAWrapperPassPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -192,6 +192,7 @@ (void) llvm::createInstructionNamerPass(); (void) llvm::createMetaRenamerPass(); (void) llvm::createAttributorLegacyPass(); + (void) llvm::createAttributorCGSCCLegacyPass(); (void) llvm::createPostOrderFunctionAttrsLegacyPass(); (void) llvm::createReversePostOrderFunctionAttrsPass(); (void) llvm::createMergeFunctionsPass(); diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -101,13 +101,16 @@ #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Utils/CallGraphUpdater.h" namespace llvm { @@ -537,25 +540,16 @@ struct AnalysisGetter { template typename Analysis::Result *getAnalysis(const Function &F) { - if (!MAM || !F.getParent()) + if (!FAM || !F.getParent()) return nullptr; - auto &FAM = MAM->getResult( - const_cast(*F.getParent())) - .getManager(); - return &FAM.getResult(const_cast(F)); + return &FAM->getResult(const_cast(F)); } - template - typename Analysis::Result *getAnalysis(const Module &M) { - if (!MAM) - return nullptr; - return &MAM->getResult(const_cast(M)); - } - AnalysisGetter(ModuleAnalysisManager &MAM) : MAM(&MAM) {} + AnalysisGetter(FunctionAnalysisManager &FAM) : FAM(&FAM) {} AnalysisGetter() {} private: - ModuleAnalysisManager *MAM = nullptr; + FunctionAnalysisManager *FAM = nullptr; }; /// Data structure to hold cached (LLVM-IR) information. @@ -571,20 +565,10 @@ /// reusable, it is advised to inherit from the InformationCache and cast the /// instance down in the abstract attributes. struct InformationCache { - InformationCache(const Module &M, AnalysisGetter &AG) - : DL(M.getDataLayout()), Explorer(/* ExploreInterBlock */ true), AG(AG) { - - CallGraph *CG = AG.getAnalysis(M); - if (!CG) - return; - - DenseMap SccSize; - for (scc_iterator I = scc_begin(CG); !I.isAtEnd(); ++I) { - for (CallGraphNode *Node : *I) - SccSize[Node->getFunction()] = I->size(); - } - SccSizeOpt = std::move(SccSize); - } + InformationCache(const Module &M, AnalysisGetter &AG, + SetVector *CGSCC) + : DL(M.getDataLayout()), Explorer(/* ExploreInterBlock */ true), AG(AG), + CGSCC(CGSCC) {} /// A map type from opcodes to instructions with this opcode. using OpcodeInstMapTy = DenseMap>; @@ -624,11 +608,11 @@ return AG.getAnalysis(F); } - /// Return SCC size on call graph for function \p F. + /// Return SCC size on call graph for function \p F or 0 if unknown. unsigned getSccSize(const Function &F) { - if (!SccSizeOpt.hasValue()) - return 0; - return (SccSizeOpt.getValue())[&F]; + if (CGSCC && CGSCC->count(const_cast(&F))) + return CGSCC->size(); + return 0; } /// Return datalayout used in the module. @@ -657,8 +641,8 @@ /// Getters for analysis. AnalysisGetter &AG; - /// Cache result for scc size in the call graph - Optional> SccSizeOpt; + /// The underlying CG-SCC, or null if not available. + SetVector *CGSCC; /// Give the Attributor access to the members so /// Attributor::identifyDefaultAbstractAttributes(...) can initialize them. @@ -695,15 +679,18 @@ struct Attributor { /// Constructor /// + /// \param Functions The set of functions we are deriving attributes for. /// \param InfoCache Cache to hold various information accessible for /// the abstract attributes. + /// \param CGUpdater Helper to update an underlying call graph. /// \param DepRecomputeInterval Number of iterations until the dependences /// between abstract attributes are recomputed. /// \param Whitelist If not null, a set limiting the attribute opportunities. - Attributor(InformationCache &InfoCache, unsigned DepRecomputeInterval, + Attributor(SetVector &Functions, InformationCache &InfoCache, + CallGraphUpdater &CGUpdater, unsigned DepRecomputeInterval, DenseSet *Whitelist = nullptr) - : InfoCache(InfoCache), DepRecomputeInterval(DepRecomputeInterval), - Whitelist(Whitelist) {} + : Functions(Functions), InfoCache(InfoCache), CGUpdater(CGUpdater), + DepRecomputeInterval(DepRecomputeInterval), Whitelist(Whitelist) {} ~Attributor() { DeleteContainerPointers(AllAbstractAttributes); @@ -717,7 +704,7 @@ /// as the Attributor is not destroyed (it owns the attributes now). /// /// \Returns CHANGED if the IR was changed, otherwise UNCHANGED. - ChangeStatus run(Module &M); + ChangeStatus run(); /// Lookup an abstract attribute of type \p AAType at position \p IRP. While /// no abstract attribute is found equivalent positions are checked, see @@ -1034,6 +1021,17 @@ /// Return the data layout associated with the anchor scope. const DataLayout &getDataLayout() const { return InfoCache.DL; } + /// Replace all uses of \p Old with \p New and, for calls (and invokes), + /// update the call graph. + void replaceAllUsesWith(Value &Old, Value &New) { + if (CallBase *OldCB = dyn_cast(&Old)) { + // We do not modify the call graph here but simply reanalyze the old + // function. This should be revisited once the old PM is gone. + CGModifiedFunctions.insert(OldCB->getFunction()); + } + Old.replaceAllUsesWith(&New); + } + private: /// Check \p Pred on all call sites of \p Fn. /// @@ -1062,9 +1060,10 @@ // For now we ignore naked and optnone functions. bool Invalidate = Whitelist && !Whitelist->count(&AAType::ID); - if (const Function *Fn = IRP.getAnchorScope()) - Invalidate |= Fn->hasFnAttribute(Attribute::Naked) || - Fn->hasFnAttribute(Attribute::OptimizeNone); + const Function *FnScope = IRP.getAnchorScope(); + if (FnScope) + Invalidate |= FnScope->hasFnAttribute(Attribute::Naked) || + FnScope->hasFnAttribute(Attribute::OptimizeNone); // Bootstrap the new attribute with an initial update to propagate // information, e.g., function -> call site. If it is not on a given @@ -1075,6 +1074,15 @@ } AA.initialize(*this); + + // We can initialize (=look at) code outside the current function set but + // not call update because that would again spawn new abstract attributes in + // potentially unconnected code regions (=SCCs). + if (FnScope && !Functions.count(const_cast(FnScope))) { + AA.getState().indicatePessimisticFixpoint(); + return AA; + } + AA.update(*this); if (TrackDependence && AA.getState().isValidState()) @@ -1112,7 +1120,8 @@ /// Apply all requested function signature rewrites /// (\see registerFunctionSignatureRewrite) and return Changed if the module /// was altered. - ChangeStatus rewriteFunctionSignatures(); + ChangeStatus + rewriteFunctionSignatures(SmallPtrSetImpl &ModifiedFns); /// The set of all abstract attributes. ///{ @@ -1149,9 +1158,19 @@ DenseMap> ArgumentReplacementMap; + /// The set of functions we are deriving attributes for. + SetVector &Functions; + /// The information cache that holds pre-processed (LLVM-IR) information. InformationCache &InfoCache; + /// Helper to update an underlying call graph. + CallGraphUpdater &CGUpdater; + + /// Set of functions for which we modified the content such that it might + /// impact the call graph. + SmallPtrSet CGModifiedFunctions; + /// Set if the attribute currently updated did query a non-fix attribute. bool QueriedNonFixAA; @@ -1829,8 +1848,13 @@ struct AttributorPass : public PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; +struct AttributorCGSCCPass : public PassInfoMixin { + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, + LazyCallGraph &CG, CGSCCUpdateResult &UR); +}; Pass *createAttributorLegacyPass(); +Pass *createAttributorCGSCCLegacyPass(); /// ---------------------------------------------------------------------------- /// Abstract Attribute Classes diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -138,6 +138,7 @@ initializeJumpThreadingPass(R); initializeSROALegacyPassPass(R); initializeAttributorLegacyPassPass(R); + initializeAttributorCGSCCLegacyPassPass(R); initializePostOrderFunctionAttrsLegacyPassPass(R); initializeReversePostOrderFunctionAttrsLegacyPassPass(R); initializeGlobalsAAWrapperPassPass(R); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -746,6 +746,7 @@ MPM.addPass(PGOIndirectCallPromotion(Phase == ThinLTOPhase::PostLink, true /* SamplePGO */)); } + MPM.addPass(AttributorPass()); // Interprocedural constant propagation now that basic cleanup has occurred // and prior to optimizing globals. @@ -828,6 +829,8 @@ IP.HotCallSiteThreshold = 0; MainCGPipeline.addPass(InlinerPass(IP)); + MainCGPipeline.addPass(AttributorCGSCCPass()); + // Now deduce any function attributes based in the current code. MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -108,6 +108,7 @@ CGSCC_PASS("argpromotion", ArgumentPromotionPass()) CGSCC_PASS("invalidate", InvalidateAllAnalysesPass()) CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass()) +CGSCC_PASS("attributor-cgscc", AttributorCGSCCPass()) CGSCC_PASS("inline", InlinerPass()) CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass()) #undef CGSCC_PASS diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -13,13 +13,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO/Attributor.h" +#include "llvm/Transforms/IPO/Attributor.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/GlobalsModRef.h" @@ -245,20 +247,6 @@ return nullptr; } -/// For calls (and invokes) we will only replace instruction uses to not disturb -/// the old style call graph. -/// TODO: Remove this once we get rid of the old PM. -static void replaceAllInstructionUsesWith(Value &Old, Value &New) { - if (!isa(Old)) - return Old.replaceAllUsesWith(&New); - SmallVector Uses; - for (Use &U : Old.uses()) - if (isa(U.getUser())) - Uses.push_back(&U); - for (Use *U : Uses) - U->set(&New); -} - static Optional getAssumedConstant(Attributor &A, const Value &V, const AbstractAttribute &AA, bool &UsedAssumedInformation) { @@ -1199,10 +1187,10 @@ "Number of function with unique return"); // Callback to replace the uses of CB with the constant C. - auto ReplaceCallSiteUsersWith = [](CallBase &CB, Constant &C) { + auto ReplaceCallSiteUsersWith = [&A](CallBase &CB, Constant &C) { if (CB.getNumUses() == 0 || CB.isMustTailCall()) return ChangeStatus::UNCHANGED; - replaceAllInstructionUsesWith(CB, C); + A.replaceAllUsesWith(CB, C); return ChangeStatus::CHANGED; }; @@ -4657,7 +4645,7 @@ AI = new BitCastInst(AI, MallocCall->getType(), "malloc_bc", AI->getNextNode()); - replaceAllInstructionUsesWith(*MallocCall, *AI); + A.replaceAllUsesWith(*MallocCall, *AI); if (auto *II = dyn_cast(MallocCall)) { auto *NBB = II->getNormalDest(); @@ -6339,7 +6327,7 @@ bool Attributor::isAssumedDead(const AbstractAttribute &AA, const AAIsDead *LivenessAA) { const Instruction *CtxI = AA.getIRPosition().getCtxI(); - if (!CtxI) + if (!CtxI || !Functions.count(const_cast(CtxI->getFunction()))) return false; // TODO: Find a good way to utilize fine and coarse grained liveness @@ -6618,7 +6606,7 @@ return true; } -ChangeStatus Attributor::run(Module &M) { +ChangeStatus Attributor::run() { LLVM_DEBUG(dbgs() << "[Attributor] Identified and initialized " << AllAbstractAttributes.size() << " abstract attributes.\n"); @@ -6805,9 +6793,9 @@ NumAttributesValidFixpoint += NumAtFixpoint; (void)NumFinalAAs; - assert( - NumFinalAAs == AllAbstractAttributes.size() && - "Expected the final number of abstract attributes to remain unchanged!"); + assert(NumFinalAAs == AllAbstractAttributes.size() && + "Expected the final number of abstract attributes to remain " + "unchanged!"); // Delete stuff at the end to avoid invalid references and a nice order. { @@ -6827,11 +6815,12 @@ LLVM_DEBUG(dbgs() << "Use " << *NewV << " in " << *U->getUser() << " instead of " << *OldV << "\n"); U->set(NewV); - if (Instruction *I = dyn_cast(OldV)) + if (Instruction *I = dyn_cast(OldV)) { + CGModifiedFunctions.insert(I->getFunction()); if (!isa(I) && !ToBeDeletedInsts.count(I) && - isInstructionTriviallyDead(I)) { + isInstructionTriviallyDead(I)) DeadInsts.push_back(I); - } + } if (isa(NewV) && isa(U->getUser())) { Instruction *UserI = cast(U->getUser()); if (isa(NewV)) { @@ -6868,13 +6857,18 @@ } } for (auto &V : ToBeChangedToUnreachableInsts) - if (Instruction *I = dyn_cast_or_null(V)) + if (Instruction *I = dyn_cast_or_null(V)) { + CGModifiedFunctions.insert(I->getFunction()); changeToUnreachable(I, /* UseLLVMTrap */ false); - for (Instruction *I : TerminatorsToFold) + } + for (Instruction *I : TerminatorsToFold) { + CGModifiedFunctions.insert(I->getFunction()); ConstantFoldTerminator(I->getParent()); + } for (auto &V : ToBeDeletedInsts) { if (Instruction *I = dyn_cast_or_null(V)) { + CGModifiedFunctions.insert(I->getFunction()); I->replaceAllUsesWith(UndefValue::get(I->getType())); if (!isa(I) && isInstructionTriviallyDead(I)) DeadInsts.push_back(I); @@ -6888,7 +6882,10 @@ if (unsigned NumDeadBlocks = ToBeDeletedBlocks.size()) { SmallVector ToBeDeletedBBs; ToBeDeletedBBs.reserve(NumDeadBlocks); - ToBeDeletedBBs.append(ToBeDeletedBlocks.begin(), ToBeDeletedBlocks.end()); + for (BasicBlock *BB : ToBeDeletedBlocks) { + CGModifiedFunctions.insert(BB->getParent()); + ToBeDeletedBBs.push_back(BB); + } // Actually we do not delete the blocks but squash them into a single // unreachable but untangling branches that jump here is something we need // to do in a more generic way. @@ -6902,9 +6899,9 @@ // as live to lower the number of iterations. If they happen to be dead, the // below fixpoint loop will identify and eliminate them. SmallVector InternalFns; - for (Function &F : M) - if (F.hasLocalLinkage()) - InternalFns.push_back(&F); + for (Function *F : Functions) + if (F->hasLocalLinkage()) + InternalFns.push_back(F); bool FoundDeadFn = true; while (FoundDeadFn) { @@ -6929,17 +6926,18 @@ } } + // Rewrite the functions as requested during manifest. + ManifestChange = + ManifestChange | rewriteFunctionSignatures(CGModifiedFunctions); + + for (Function *Fn : CGModifiedFunctions) + CGUpdater.reanalyzeFunction(*Fn); + STATS_DECL(AAIsDead, Function, "Number of dead functions deleted."); BUILD_STAT_NAME(AAIsDead, Function) += ToBeDeletedFunctions.size(); - // Rewrite the functions as requested during manifest. - ManifestChange = ManifestChange | rewriteFunctionSignatures(); - - for (Function *Fn : ToBeDeletedFunctions) { - Fn->deleteBody(); - Fn->replaceAllUsesWith(UndefValue::get(Fn->getType())); - Fn->eraseFromParent(); - } + for (Function *Fn : ToBeDeletedFunctions) + CGUpdater.removeFunction(*Fn); if (VerifyMaxFixpointIterations && IterationCounter != MaxFixpointIterations) { @@ -7043,7 +7041,8 @@ return true; } -ChangeStatus Attributor::rewriteFunctionSignatures() { +ChangeStatus Attributor::rewriteFunctionSignatures( + SmallPtrSetImpl &ModifiedFns) { ChangeStatus Changed = ChangeStatus::UNCHANGED; for (auto &It : ArgumentReplacementMap) { @@ -7203,11 +7202,20 @@ for (auto &CallSitePair : CallSitePairs) { CallBase &OldCB = *CallSitePair.first; CallBase &NewCB = *CallSitePair.second; + // We do not modify the call graph here but simply reanalyze the old + // function. This should be revisited once the old PM is gone. + ModifiedFns.insert(OldCB.getFunction()); OldCB.replaceAllUsesWith(&NewCB); OldCB.eraseFromParent(); } - ToBeDeletedFunctions.insert(OldFn); + // Replace the function in the call graph (if any). + CGUpdater.replaceFunctionWith(*OldFn, *NewFn); + + // If the old function was modified and needed to be reanalyzed, the new one + // does now. + if (ModifiedFns.erase(OldFn)) + ModifiedFns.insert(NewFn); Changed = ChangeStatus::CHANGED; } @@ -7537,50 +7545,90 @@ /// Pass (Manager) Boilerplate /// ---------------------------------------------------------------------------- -static bool runAttributorOnModule(Module &M, AnalysisGetter &AG) { - if (DisableAttributor) +static bool runAttributorOnFunctions(InformationCache &InfoCache, + SetVector &Functions, + AnalysisGetter &AG, + CallGraphUpdater &CGUpdater) { + if (DisableAttributor || Functions.empty()) return false; - LLVM_DEBUG(dbgs() << "[Attributor] Run on module with " << M.size() + LLVM_DEBUG(dbgs() << "[Attributor] Run on module with " << Functions.size() << " functions.\n"); // Create an Attributor and initially empty information cache that is filled // while we identify default attribute opportunities. - InformationCache InfoCache(M, AG); - Attributor A(InfoCache, DepRecInterval); + Attributor A(Functions, InfoCache, CGUpdater, DepRecInterval); - for (Function &F : M) - A.initializeInformationCache(F); + for (Function *F : Functions) + A.initializeInformationCache(*F); - for (Function &F : M) { - if (F.hasExactDefinition()) + for (Function *F : Functions) { + if (F->hasExactDefinition()) NumFnWithExactDefinition++; else NumFnWithoutExactDefinition++; // We look at internal functions only on-demand but if any use is not a - // direct call, we have to do it eagerly. - if (F.hasLocalLinkage()) { - if (llvm::all_of(F.uses(), [](const Use &U) { - return ImmutableCallSite(U.getUser()) && - ImmutableCallSite(U.getUser()).isCallee(&U); + // direct call or outside the current set of analyzed functions, we have to + // do it eagerly. + if (F->hasLocalLinkage()) { + if (llvm::all_of(F->uses(), [&Functions](const Use &U) { + ImmutableCallSite ICS(U.getUser()); + return ICS && ICS.isCallee(&U) && + Functions.count(const_cast(ICS.getCaller())); })) continue; } // Populate the Attributor with abstract attribute opportunities in the // function and the information cache with IR information. - A.identifyDefaultAbstractAttributes(F); + A.identifyDefaultAbstractAttributes(*F); } - bool Changed = A.run(M) == ChangeStatus::CHANGED; - assert(!verifyModule(M, &errs()) && "Module verification failed!"); + bool Changed = A.run() == ChangeStatus::CHANGED; + assert(!verifyModule(*Functions.front()->getParent(), &errs()) && + "Module verification failed!"); return Changed; } PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) { - AnalysisGetter AG(AM); - if (runAttributorOnModule(M, AG)) { + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + AnalysisGetter AG(FAM); + + SetVector Functions; + for (Function &F : M) + Functions.insert(&F); + + CallGraphUpdater CGUpdater; + InformationCache InfoCache(M, AG, /* CGSCC */ nullptr); + if (runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater)) { + // FIXME: Think about passes we will preserve and add them here. + return PreservedAnalyses::none(); + } + return PreservedAnalyses::all(); +} + +PreservedAnalyses AttributorCGSCCPass::run(LazyCallGraph::SCC &C, + CGSCCAnalysisManager &AM, + LazyCallGraph &CG, + CGSCCUpdateResult &UR) { + FunctionAnalysisManager &FAM = + AM.getResult(C, CG).getManager(); + AnalysisGetter AG(FAM); + + SetVector Functions; + for (LazyCallGraph::Node &N : C) + Functions.insert(&N.getFunction()); + + if (Functions.empty()) + return PreservedAnalyses::all(); + + Module &M = *Functions.back()->getParent(); + CallGraphUpdater CGUpdater; + CGUpdater.initialize(CG, C, AM, UR); + InformationCache InfoCache(M, AG, /* CGSCC */ &Functions); + if (runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater)) { // FIXME: Think about passes we will preserve and add them here. return PreservedAnalyses::none(); } @@ -7601,20 +7649,68 @@ return false; AnalysisGetter AG; - return runAttributorOnModule(M, AG); + SetVector Functions; + for (Function &F : M) + Functions.insert(&F); + + CallGraphUpdater CGUpdater; + InformationCache InfoCache(M, AG, /* CGSCC */ nullptr); + return runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + // FIXME: Think about passes we will preserve and add them here. + AU.addRequired(); + } +}; + +struct AttributorCGSCCLegacyPass : public CallGraphSCCPass { + CallGraphUpdater CGUpdater; + static char ID; + + AttributorCGSCCLegacyPass() : CallGraphSCCPass(ID) { + initializeAttributorCGSCCLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnSCC(CallGraphSCC &SCC) override { + if (skipSCC(SCC)) + return false; + + SetVector Functions; + for (CallGraphNode *CGN : SCC) + if (Function *Fn = CGN->getFunction()) + if (!Fn->isDeclaration()) + Functions.insert(Fn); + + if (Functions.empty()) + return false; + + AnalysisGetter AG; + CallGraph &CG = const_cast(SCC.getCallGraph()); + CGUpdater.initialize(CG, SCC); + Module &M = *Functions.back()->getParent(); + InformationCache InfoCache(M, AG, /* CGSCC */ &Functions); + return runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater); } + bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); } + void getAnalysisUsage(AnalysisUsage &AU) const override { // FIXME: Think about passes we will preserve and add them here. AU.addRequired(); + CallGraphSCCPass::getAnalysisUsage(AU); } }; } // end anonymous namespace Pass *llvm::createAttributorLegacyPass() { return new AttributorLegacyPass(); } +Pass *llvm::createAttributorCGSCCLegacyPass() { + return new AttributorCGSCCLegacyPass(); +} char AttributorLegacyPass::ID = 0; +char AttributorCGSCCLegacyPass::ID = 0; const char AAReturnedValues::ID = 0; const char AANoUnwind::ID = 0; @@ -7767,3 +7863,11 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(AttributorLegacyPass, "attributor", "Deduce and propagate attributes", false, false) +INITIALIZE_PASS_BEGIN(AttributorCGSCCLegacyPass, "attributor-cgscc", + "Deduce and propagate attributes (CG-SCC pass)", false, + false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_END(AttributorCGSCCLegacyPass, "attributor-cgscc", + "Deduce and propagate attributes (CG-SCC pass)", false, + false) diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -46,6 +46,7 @@ initializeMergeFunctionsLegacyPassPass(Registry); initializePartialInlinerLegacyPassPass(Registry); initializeAttributorLegacyPassPass(Registry); + initializeAttributorCGSCCLegacyPassPass(Registry); initializePostOrderFunctionAttrsLegacyPassPass(Registry); initializeReversePostOrderFunctionAttrsLegacyPassPass(Registry); initializePruneEHPass(Registry); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -551,6 +551,9 @@ // Infer attributes about declarations if possible. MPM.add(createInferFunctionAttrsLegacyPass()); + // Infer attributes on declarations, call sites, arguments, etc. + MPM.add(createAttributorLegacyPass()); + addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); if (OptLevel > 2) @@ -559,9 +562,6 @@ MPM.add(createIPSCCPPass()); // IP SCCP MPM.add(createCalledValuePropagationPass()); - // Infer attributes on declarations, call sites, arguments, etc. - MPM.add(createAttributorLegacyPass()); - MPM.add(createGlobalOptimizerPass()); // Optimize out global vars // Promote any localized global vars. MPM.add(createPromoteMemoryToRegisterPass()); @@ -599,6 +599,9 @@ RunInliner = true; } + // Infer attributes on declarations, call sites, arguments, etc. for an SCC. + MPM.add(createAttributorCGSCCLegacyPass()); + MPM.add(createPostOrderFunctionAttrsLegacyPass()); if (OptLevel > 2) MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args @@ -930,6 +933,9 @@ // CSFDO instrumentation and use pass. addPGOInstrPasses(PM, /* IsCS */ true); + // Infer attributes on declarations, call sites, arguments, etc. for an SCC. + PM.add(createAttributorCGSCCLegacyPass()); + // Optimize globals again if we ran the inliner. if (RunInliner) PM.add(createGlobalOptimizerPass()); diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -113,6 +113,7 @@ ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. +; CHECK-O-NEXT: Running pass: AttributorPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass @@ -140,6 +141,7 @@ ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}> ; CHECK-O-NEXT: Starting CGSCC pass manager run. ; CHECK-O-NEXT: Running pass: InlinerPass +; CHECK-O-NEXT: Running pass: AttributorCGSCCPass ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -79,6 +79,7 @@ ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. +; CHECK-O-NEXT: Running pass: AttributorPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass @@ -105,6 +106,7 @@ ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}> ; CHECK-O-NEXT: Starting CGSCC pass manager run. ; CHECK-O-NEXT: Running pass: InlinerPass +; CHECK-O-NEXT: Running pass: AttributorCGSCCPass ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll --- a/llvm/test/Other/opt-O2-pipeline.ll +++ b/llvm/test/Other/opt-O2-pipeline.ll @@ -29,11 +29,11 @@ ; CHECK-NEXT: ModulePass Manager ; CHECK-NEXT: Force set function attributes ; CHECK-NEXT: Infer set function attributes +; CHECK-NEXT: Deduce and propagate attributes ; CHECK-NEXT: Interprocedural Sparse Conditional Constant Propagation ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Called Value Propagation -; CHECK-NEXT: Deduce and propagate attributes ; CHECK-NEXT: Global Variable Optimizer ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction @@ -59,6 +59,7 @@ ; CHECK-NEXT: Call Graph SCC Pass Manager ; CHECK-NEXT: Remove unused exception handling info ; CHECK-NEXT: Function Integration/Inlining +; CHECK-NEXT: Deduce and propagate attributes (CG-SCC pass) ; CHECK-NEXT: Deduce function attributes ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -29,6 +29,7 @@ ; CHECK-NEXT: ModulePass Manager ; CHECK-NEXT: Force set function attributes ; CHECK-NEXT: Infer set function attributes +; CHECK-NEXT: Deduce and propagate attributes ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Call-site splitting @@ -36,7 +37,6 @@ ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Called Value Propagation -; CHECK-NEXT: Deduce and propagate attributes ; CHECK-NEXT: Global Variable Optimizer ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction @@ -62,6 +62,7 @@ ; CHECK-NEXT: Call Graph SCC Pass Manager ; CHECK-NEXT: Remove unused exception handling info ; CHECK-NEXT: Function Integration/Inlining +; CHECK-NEXT: Deduce and propagate attributes (CG-SCC pass) ; CHECK-NEXT: Deduce function attributes ; CHECK-NEXT: Promote 'by reference' arguments to scalars ; CHECK-NEXT: FunctionPass Manager diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll --- a/llvm/test/Other/opt-Os-pipeline.ll +++ b/llvm/test/Other/opt-Os-pipeline.ll @@ -29,11 +29,11 @@ ; CHECK-NEXT: ModulePass Manager ; CHECK-NEXT: Force set function attributes ; CHECK-NEXT: Infer set function attributes +; CHECK-NEXT: Deduce and propagate attributes ; CHECK-NEXT: Interprocedural Sparse Conditional Constant Propagation ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Called Value Propagation -; CHECK-NEXT: Deduce and propagate attributes ; CHECK-NEXT: Global Variable Optimizer ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction @@ -59,6 +59,7 @@ ; CHECK-NEXT: Call Graph SCC Pass Manager ; CHECK-NEXT: Remove unused exception handling info ; CHECK-NEXT: Function Integration/Inlining +; CHECK-NEXT: Deduce and propagate attributes (CG-SCC pass) ; CHECK-NEXT: Deduce function attributes ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction diff --git a/llvm/test/Other/pass-pipelines.ll b/llvm/test/Other/pass-pipelines.ll --- a/llvm/test/Other/pass-pipelines.ll +++ b/llvm/test/Other/pass-pipelines.ll @@ -46,6 +46,7 @@ ; CHECK-O2-NEXT: Call Graph SCC Pass Manager ; CHECK-O2-NEXT: Remove unused exception handling info ; CHECK-O2-NEXT: Function Integration/Inlining +; CHECK-O2-NEXT: Deduce and propagate attributes (CG-SCC pass) ; CHECK-O2-NEXT: Deduce function attributes ; Next up is the main function pass pipeline. It shouldn't be split up and ; should contain the main loop pass pipeline as well. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll @@ -1,9 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes ; RUN: opt -S -basicaa -attributor -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=3 < %s | FileCheck %s --check-prefixes=CHECK,OLDPM_MODULE +; RUN: opt -S -basicaa -attributor-cgscc -attributor-disable=false < %s | FileCheck %s --check-prefixes=CHECK,OLDPM_CGSCC ; RUN: opt -S -passes='attributor' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=3 < %s | FileCheck %s --check-prefixes=CHECK,NEWPM_MODULE +; RUN: opt -S -passes='attributor-cgscc' -aa-pipeline='basic-aa' -attributor-disable=false < %s | FileCheck %s --check-prefixes=CHECK,NEWPM_CGSCC ; OLDPM_MODULE-NOT: @dead ; NEWPM_MODULE-NOT: @dead +; OLDPM_CGSCC-NOT: @dead +; NEWPM_CGSCC-NOT: @dead define internal void @dead() { call i32 @test(i32* null, i32* null) diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll b/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll --- a/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll @@ -9,7 +9,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CONV:%.*]] = sext i32 undef to i64 ; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 8, [[CONV]] -; CHECK-NEXT: [[CALL2:%.*]] = call i64 @fn1(i64 [[DIV]]) #0, !range !0 +; CHECK-NEXT: [[CALL2:%.*]] = call i64 @fn1(i64 [[DIV]]) #{{[0-9]+}}, !range !0 ; CHECK-NEXT: ret i64 [[CALL2]] ; entry: diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll --- a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes -; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=CHECK,MODULE +; RUN: opt -S -passes=attributor-cgscc -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=CHECK,CGSCC ; ; #include ; @@ -53,40 +54,60 @@ declare !callback !0 dso_local i32 @pthread_create(i64*, %union.pthread_attr_t*, i8* (i8*)*, i8*) define internal i8* @foo(i8* %arg) { -; CHECK-LABEL: define {{[^@]+}}@foo -; CHECK-SAME: (i8* noalias nofree readnone returned align 536870912 [[ARG:%.*]]) -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i8* null +; MODULE-LABEL: define {{[^@]+}}@foo +; MODULE-SAME: (i8* noalias nofree readnone returned align 536870912 [[ARG:%.*]]) +; MODULE-NEXT: entry: +; MODULE-NEXT: ret i8* null +; +; CGSCC-LABEL: define {{[^@]+}}@foo +; CGSCC-SAME: (i8* noalias nofree readnone returned [[ARG:%.*]]) +; CGSCC-NEXT: entry: +; CGSCC-NEXT: ret i8* null ; entry: ret i8* %arg } define internal i8* @bar(i8* %arg) { -; CHECK-LABEL: define {{[^@]+}}@bar -; CHECK-SAME: (i8* noalias nofree nonnull readnone returned align 8 dereferenceable(8) [[ARG:%.*]]) -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i8* bitcast (i8** @GlobalVPtr to i8*) +; MODULE-LABEL: define {{[^@]+}}@bar +; MODULE-SAME: (i8* noalias nofree nonnull readnone returned align 8 dereferenceable(8) [[ARG:%.*]]) +; MODULE-NEXT: entry: +; MODULE-NEXT: ret i8* bitcast (i8** @GlobalVPtr to i8*) +; +; CGSCC-LABEL: define {{[^@]+}}@bar +; CGSCC-SAME: (i8* nofree readnone returned [[ARG:%.*]]) +; CGSCC-NEXT: entry: +; CGSCC-NEXT: ret i8* bitcast (i8** @GlobalVPtr to i8*) ; entry: ret i8* %arg } define internal i8* @baz(i8* %arg) { -; CHECK-LABEL: define {{[^@]+}}@baz -; CHECK-SAME: (i8* noalias nofree nonnull readnone returned align 8 dereferenceable(1) [[ARG:%.*]]) -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i8* [[ARG]] +; MODULE-LABEL: define {{[^@]+}}@baz +; MODULE-SAME: (i8* noalias nofree nonnull readnone returned align 8 dereferenceable(1) [[ARG:%.*]]) +; MODULE-NEXT: entry: +; MODULE-NEXT: ret i8* [[ARG]] +; +; CGSCC-LABEL: define {{[^@]+}}@baz +; CGSCC-SAME: (i8* nofree readnone returned [[ARG:%.*]]) +; CGSCC-NEXT: entry: +; CGSCC-NEXT: ret i8* [[ARG]] ; entry: ret i8* %arg } define internal i8* @buz(i8* %arg) { -; CHECK-LABEL: define {{[^@]+}}@buz -; CHECK-SAME: (i8* noalias nofree nonnull readnone returned align 8 dereferenceable(1) [[ARG:%.*]]) -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i8* [[ARG]] +; MODULE-LABEL: define {{[^@]+}}@buz +; MODULE-SAME: (i8* noalias nofree nonnull readnone returned align 8 dereferenceable(1) [[ARG:%.*]]) +; MODULE-NEXT: entry: +; MODULE-NEXT: ret i8* [[ARG]] +; +; CGSCC-LABEL: define {{[^@]+}}@buz +; CGSCC-SAME: (i8* nofree readnone returned [[ARG:%.*]]) +; CGSCC-NEXT: entry: +; CGSCC-NEXT: ret i8* [[ARG]] ; entry: ret i8* %arg diff --git a/llvm/test/Transforms/Attributor/align.ll b/llvm/test/Transforms/Attributor/align.ll --- a/llvm/test/Transforms/Attributor/align.ll +++ b/llvm/test/Transforms/Attributor/align.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --turn off -; RUN: opt -attributor -attributor-manifest-internal -attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR +; RUN: opt -attributor -attributor-manifest-internal -attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=ATTRIBUTOR,ATTRIBUTOR_MODULE +; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-disable=false -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=ATTRIBUTOR,ATTRIBUTOR_CGSCC +; RUN: opt -passes=attributor -attributor-manifest-internal -attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=ATTRIBUTOR,ATTRIBUTOR_MODULE +; RUN: opt -passes=attributor-cgscc -attributor-manifest-internal -attributor-disable=false -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=ATTRIBUTOR,ATTRIBUTOR_CGSCC target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -135,9 +138,14 @@ ; TEST 7 ; Better than IR information define align 4 i32* @test7(i32* align 32 %p) #0 { -; ATTRIBUTOR-LABEL: define {{[^@]+}}@test7 -; ATTRIBUTOR-SAME: (i32* nofree readnone returned align 32 "no-capture-maybe-returned" [[P:%.*]]) -; ATTRIBUTOR-NEXT: ret i32* [[P]] +; ATTRIBUTOR_MODULE-LABEL: define {{[^@]+}}@test7 +; ATTRIBUTOR_MODULE-SAME: (i32* nofree readnone returned align 32 "no-capture-maybe-returned" [[P:%.*]]) +; ATTRIBUTOR_MODULE-NEXT: ret i32* [[P]] +; +; ATTRIBUTOR_CGSCC-LABEL: define {{[^@]+}}@test7 +; ATTRIBUTOR_CGSCC-SAME: (i32* nofree readnone returned align 32 "no-capture-maybe-returned" [[P:%.*]]) +; ATTRIBUTOR_CGSCC-NEXT: [[TMP1:%.*]] = tail call i8* @f1(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) @a1) +; ATTRIBUTOR_CGSCC-NEXT: ret i32* [[P]] ; tail call i8* @f1(i8* align 8 dereferenceable(1) @a1) ret i32* %p @@ -146,18 +154,31 @@ ; TEST 7b ; Function Attrs: nounwind readnone ssp uwtable define internal i8* @f1b(i8* readnone %0) local_unnamed_addr #0 { -; ATTRIBUTOR-LABEL: define {{[^@]+}}@f1b -; ATTRIBUTOR-SAME: (i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr -; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = icmp eq i8* [[TMP0]], null -; ATTRIBUTOR-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP5:%.*]] -; ATTRIBUTOR: 3: -; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = tail call align 8 i8* @f2b(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) @a1) -; ATTRIBUTOR-NEXT: [[L:%.*]] = load i8, i8* [[TMP4]], align 8 -; ATTRIBUTOR-NEXT: store i8 [[L]], i8* @a1, align 8 -; ATTRIBUTOR-NEXT: br label [[TMP5]] -; ATTRIBUTOR: 5: -; ATTRIBUTOR-NEXT: [[TMP6:%.*]] = phi i8* [ [[TMP4]], [[TMP3]] ], [ [[TMP0]], [[TMP1:%.*]] ] -; ATTRIBUTOR-NEXT: ret i8* [[TMP6]] +; ATTRIBUTOR_MODULE-LABEL: define {{[^@]+}}@f1b +; ATTRIBUTOR_MODULE-SAME: (i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr +; ATTRIBUTOR_MODULE-NEXT: [[TMP2:%.*]] = icmp eq i8* [[TMP0]], null +; ATTRIBUTOR_MODULE-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP5:%.*]] +; ATTRIBUTOR_MODULE: 3: +; ATTRIBUTOR_MODULE-NEXT: [[TMP4:%.*]] = tail call align 8 i8* @f2b(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) @a1) +; ATTRIBUTOR_MODULE-NEXT: [[L:%.*]] = load i8, i8* [[TMP4]], align 8 +; ATTRIBUTOR_MODULE-NEXT: store i8 [[L]], i8* @a1, align 8 +; ATTRIBUTOR_MODULE-NEXT: br label [[TMP5]] +; ATTRIBUTOR_MODULE: 5: +; ATTRIBUTOR_MODULE-NEXT: [[TMP6:%.*]] = phi i8* [ [[TMP4]], [[TMP3]] ], [ [[TMP0]], [[TMP1:%.*]] ] +; ATTRIBUTOR_MODULE-NEXT: ret i8* [[TMP6]] +; +; ATTRIBUTOR_CGSCC-LABEL: define {{[^@]+}}@f1b +; ATTRIBUTOR_CGSCC-SAME: (i8* nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr +; ATTRIBUTOR_CGSCC-NEXT: [[TMP2:%.*]] = icmp eq i8* [[TMP0]], null +; ATTRIBUTOR_CGSCC-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP5:%.*]] +; ATTRIBUTOR_CGSCC: 3: +; ATTRIBUTOR_CGSCC-NEXT: [[TMP4:%.*]] = tail call align 8 i8* @f2b(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) @a1) +; ATTRIBUTOR_CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[TMP4]], align 8 +; ATTRIBUTOR_CGSCC-NEXT: store i8 [[L]], i8* @a1, align 8 +; ATTRIBUTOR_CGSCC-NEXT: br label [[TMP5]] +; ATTRIBUTOR_CGSCC: 5: +; ATTRIBUTOR_CGSCC-NEXT: [[TMP6:%.*]] = phi i8* [ [[TMP4]], [[TMP3]] ], [ [[TMP0]], [[TMP1:%.*]] ] +; ATTRIBUTOR_CGSCC-NEXT: ret i8* [[TMP6]] ; %2 = icmp eq i8* %0, null br i1 %2, label %3, label %5 @@ -262,7 +283,8 @@ declare void @user_i32_ptr(i32*) readnone nounwind define internal void @test8(i32* %a, i32* %b, i32* %c) { -; ATTRIBUTOR: define internal void @test8(i32* noalias nocapture readnone align 4 %a, i32* noalias nocapture readnone align 4 %b, i32* noalias nocapture readnone %c) +; ATTRIBUTOR_MODULE: define internal void @test8(i32* noalias nocapture readnone align 4 %a, i32* noalias nocapture readnone align 4 %b, i32* noalias nocapture readnone %c) +; ATTRIBUTOR_CGSCC: define internal void @test8(i32* nocapture readnone align 4 %a, i32* nocapture readnone align 4 %b, i32* nocapture readnone %c) call void @user_i32_ptr(i32* %a) call void @user_i32_ptr(i32* %b) call void @user_i32_ptr(i32* %c) diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll --- a/llvm/test/Transforms/Attributor/liveness.ll +++ b/llvm/test/Transforms/Attributor/liveness.ll @@ -1,9 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes -; RUN: opt -attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,OLDPM -; RUN: opt -passes=attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NEWPM +; RUN: opt -attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,MODULE,ALL_BUT_OLD_CGSCCC +; RUN: opt -attributor-cgscc --attributor-disable=false -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC +; RUN: opt -passes=attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,MODULE,ALL_BUT_OLD_CGSCCC +; RUN: opt -passes='attributor-cgscc' --attributor-disable=false -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC,ALL_BUT_OLD_CGSCCC ; UTC_ARGS: --turn off -; CHECK: @dead_with_blockaddress_users.l = constant [2 x i8*] [i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 1 to i8*)] +; ALL_BUT_OLD_CGSCCC: @dead_with_blockaddress_users.l = constant [2 x i8*] [i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 1 to i8*)] @dead_with_blockaddress_users.l = constant [2 x i8*] [i8* blockaddress(@dead_with_blockaddress_users, %lab0), i8* blockaddress(@dead_with_blockaddress_users, %end)] declare void @no_return_call() nofree noreturn nounwind readnone @@ -23,7 +25,7 @@ ; This internal function has no live call sites, so all its BBs are considered dead, ; and nothing should be deduced for it. -; CHECK-NOT: define internal i32 @dead_internal_func(i32 %0) +; MODULE-NOT: define internal i32 @dead_internal_func(i32 %0) define internal i32 @dead_internal_func(i32 %0) { %2 = icmp slt i32 %0, 1 br i1 %2, label %3, label %5 @@ -47,7 +49,7 @@ ret i32 %2 } -; CHECK-NOT: internal_load +; MODULE-NOT: internal_load define internal i32 @internal_load(i32*) norecurse nounwind uwtable { %2 = load i32, i32* %0, align 4 ret i32 %2 @@ -55,7 +57,8 @@ ; TEST 1: Only first block is live. ; CHECK: Function Attrs: nofree noreturn nosync nounwind -; CHECK-NEXT: define i32 @first_block_no_return(i32 %a, i32* nocapture nofree nonnull readonly %ptr1, i32* nocapture nofree readnone %ptr2) +; MODULE-NEXT: define i32 @first_block_no_return(i32 %a, i32* nocapture nofree nonnull readonly %ptr1, i32* nocapture nofree readnone %ptr2) +; CGSCC-NEXT: define i32 @first_block_no_return(i32 %a, i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) %ptr1, i32* nocapture nofree readnone %ptr2) define i32 @first_block_no_return(i32 %a, i32* nonnull %ptr1, i32* %ptr2) #0 { entry: call i32 @internal_load(i32* %ptr1) @@ -792,10 +795,11 @@ ; CHECK: define internal void @non_dead_d13() ; CHECK: define internal void @non_dead_d14() ; Verify we actually deduce information for these functions. -; OLDPM: Function Attrs: nofree nosync nounwind readnone willreturn -; NEWPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; CHECK-NEXT: define internal void @non_dead_d15() -; CHECK-NOT: define internal void @dead_e +; MODULE: Function Attrs: nofree nosync nounwind readnone willreturn +; MODULE-NEXT: define internal void @non_dead_d15() +; MODULE-NOT: define internal void @dead_e +; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC-NEXT: define internal void @non_dead_d15() declare void @blowup() noreturn define void @live_with_dead_entry() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { @@ -852,14 +856,17 @@ ret void } -; CHECK: define internal void @useless_arg_sink() +; MODULE: define internal void @useless_arg_sink() +; CGSCC: define internal void @useless_arg_sink(i32*{{.*}} %a) define internal void @useless_arg_sink(i32* %a) { ret void } -; CHECK: define internal void @useless_arg_almost_sink() +; MODULE: define internal void @useless_arg_almost_sink() +; CGSCC: define internal void @useless_arg_almost_sink(i32*{{.*}} %a) define internal void @useless_arg_almost_sink(i32* %a) { -; CHECK: call void @useless_arg_sink() +; MODULE: call void @useless_arg_sink() +; CGSCC: call void @useless_arg_sink(i32* noalias nofree readnone %a) call void @useless_arg_sink(i32* %a) ret void } @@ -867,7 +874,8 @@ ; Check we do not annotate the function interface of this weak function. ; CHECK: define weak_odr void @useless_arg_ext(i32* %a) define weak_odr void @useless_arg_ext(i32* %a) { -; CHECK: call void @useless_arg_almost_sink() +; MODULE: call void @useless_arg_almost_sink() +; CGSCC: call void @useless_arg_almost_sink(i32* noalias nofree readnone %a) call void @useless_arg_almost_sink(i32* %a) ret void } @@ -926,7 +934,7 @@ ; UTC_ARGS: --turn off ; Allow blockaddress users -; CHECK-NOT @dead_with_blockaddress_users +; ALL_BUT_OLD_CGSCCC-NOT @dead_with_blockaddress_users define internal void @dead_with_blockaddress_users(i32* nocapture %pc) nounwind readonly { entry: br label %indirectgoto diff --git a/llvm/test/Transforms/Attributor/norecurse.ll b/llvm/test/Transforms/Attributor/norecurse.ll --- a/llvm/test/Transforms/Attributor/norecurse.ll +++ b/llvm/test/Transforms/Attributor/norecurse.ll @@ -1,4 +1,5 @@ -; RUN: opt -passes=attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=ATTRIBUTOR +; RUN: opt -attributor-cgscc --attributor-disable=false -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=ATTRIBUTOR +; RUN: opt -passes=attributor-cgscc --attributor-disable=false -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=ATTRIBUTOR,ATTRIBUTOR_NPM ; Copied from Transforms/FunctoinAttrs/norecurse.ll ; ATTRIBUTOR: Function Attrs: nofree norecurse nosync nounwind readnone willreturn @@ -136,8 +137,8 @@ declare void @unknown() ; Call an unknown function in a dead block. -; ATTRIBUTOR: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; ATTRIBUTOR: define i32 @call_unknown_in_dead_block() +; ATTRIBUTOR_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; ATTRIBUTOR_NPM: define i32 @call_unknown_in_dead_block() define i32 @call_unknown_in_dead_block() local_unnamed_addr { ret i32 0 Dead: diff --git a/llvm/test/Transforms/Attributor/willreturn.ll b/llvm/test/Transforms/Attributor/willreturn.ll --- a/llvm/test/Transforms/Attributor/willreturn.ll +++ b/llvm/test/Transforms/Attributor/willreturn.ll @@ -1,4 +1,5 @@ -; RUN: opt -passes=attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR +; RUN: opt -passes=attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=ATTRIBUTOR,ATTRIBUTOR_MODULE +; RUN: opt -passes=attributor-cgscc --attributor-disable=false -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=ATTRIBUTOR,ATTRIBUTOR_CGSCC target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -8,7 +9,8 @@ ; TEST 1 (positive case) -; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable willreturn +; ATTRIBUTOR_MODULE: Function Attrs: nofree noinline nosync nounwind readnone uwtable willreturn +; ATTRIBUTOR_CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable willreturn ; ATTRIBUTOR-NEXT: define void @only_return() define void @only_return() #0 { ret void @@ -51,7 +53,8 @@ ; } ; fact_maybe_not(-1) doesn't stop. -; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable +; ATTRIBUTOR_MODULE: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; ATTRIBUTOR_CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable ; ATTRIBUTOR-NOT: willreturn ; ATTRIBUTOR-NEXT: define i32 @fact_maybe_not_halt(i32 %0) local_unnamed_addr define i32 @fact_maybe_not_halt(i32 %0) local_unnamed_addr #0 { @@ -85,7 +88,8 @@ ; } ; FIXME: missing willreturn -; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable +; ATTRIBUTOR_MODULE: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; ATTRIBUTOR_CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable ; ATTRIBUTOR-NEXT: define i32 @fact_loop(i32 %0) local_unnamed_addr define i32 @fact_loop(i32 %0) local_unnamed_addr #0 { %2 = icmp slt i32 %0, 1 @@ -223,14 +227,18 @@ ; ATTRIBUTOR-NEXT: declare void @will_return() declare void @will_return() willreturn norecurse -; ATTRIBUTOR: Function Attrs: noinline norecurse nounwind uwtable willreturn +; ATTRIBUTOR_MODULE: Function Attrs: noinline nounwind uwtable willreturn +; ATTRIBUTOR_CGSCC: Function Attrs: noinline norecurse nounwind uwtable willreturn ; ATTRIBUTOR-NEXT: define void @f1() define void @f1() #0 { tail call void @will_return() ret void } -; ATTRIBUTOR: Function Attrs: noinline norecurse nounwind uwtable willreturn +; ATTRIBUTOR_MODULE: Function Attrs: noinline nounwind uwtable +; FIXME: Because we do not derive norecurse in the module run anymore, willreturn is missing as well. +; ATTRIBUTOR_MODULE-NOT: willreturn +; ATTRIBUTOR_CGSCC: Function Attrs: noinline norecurse nounwind uwtable willreturn ; ATTRIBUTOR-NEXT: define void @f2() define void @f2() #0 { tail call void @f1() @@ -241,7 +249,8 @@ ; TEST 9 (negative case) ; call willreturn function in endless loop. -; ATTRIBUTOR: Function Attrs: noinline norecurse noreturn nounwind uwtable +; ATTRIBUTOR_MODULE: Function Attrs: noinline noreturn nounwind uwtable +; ATTRIBUTOR_CGSCC: Function Attrs: noinline norecurse noreturn nounwind uwtable ; ATTRIBUTOR-NOT: willreturn ; ATTRIBUTOR-NEXT: define void @call_will_return_but_has_loop() define void @call_will_return_but_has_loop() #0 { @@ -288,7 +297,8 @@ ; } ; FIXME: missing willreturn -; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind readonly uwtable +; ATTRIBUTOR_MODULE: Function Attrs: nofree noinline nosync nounwind readonly uwtable +; ATTRIBUTOR_CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readonly uwtable ; ATTRIBUTOR-NEXT: define i32 @loop_constant_trip_count(i32* nocapture nofree readonly %0) define i32 @loop_constant_trip_count(i32* nocapture readonly %0) #0 { br label %3 @@ -318,7 +328,9 @@ ; } ; return ans; ; } -; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind readonly uwtable +; FNATTR-NEXT: define i32 @loop_trip_count_unbound(i32 %0, i32 %1, i32* nocapture readonly %2, i32 %3) local_unnamed_addr +; ATTRIBUTOR_MODULE: Function Attrs: nofree noinline nosync nounwind readonly uwtable +; ATTRIBUTOR_CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readonly uwtable ; ATTRIBUTOR-NOT: willreturn ; ATTRIBUTOR-NEXT: define i32 @loop_trip_count_unbound(i32 %0, i32 %1, i32* nocapture nofree readonly %2, i32 %3) local_unnamed_addr define i32 @loop_trip_count_unbound(i32 %0, i32 %1, i32* nocapture readonly %2, i32 %3) local_unnamed_addr #0 { @@ -354,7 +366,8 @@ ; FIXME: missing willreturn -; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind readonly uwtable +; ATTRIBUTOR_MODULE: Function Attrs: nofree noinline nosync nounwind readonly uwtable +; ATTRIBUTOR_CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readonly uwtable ; ATTRIBUTOR-NEXT: define i32 @loop_trip_dec(i32 %0, i32* nocapture nofree readonly %1) local_unnamed_addr define i32 @loop_trip_dec(i32 %0, i32* nocapture readonly %1) local_unnamed_addr #0 { @@ -383,7 +396,8 @@ ; TEST 14 (positive case) ; multiple return -; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable willreturn +; ATTRIBUTOR_MODULE: Function Attrs: nofree noinline nosync nounwind readnone uwtable willreturn +; ATTRIBUTOR_CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable willreturn ; ATTRIBUTOR-NEXT: define i32 @multiple_return(i32 %a) define i32 @multiple_return(i32 %a) #0 { %b = icmp eq i32 %a, 0 @@ -399,7 +413,8 @@ ; unreachable exit ; 15.1 (positive case) -; ATTRIBUTOR: Function Attrs: noinline norecurse nounwind uwtable willreturn +; ATTRIBUTOR_MODULE: Function Attrs: noinline nounwind uwtable willreturn +; ATTRIBUTOR_CGSCC: Function Attrs: noinline norecurse nounwind uwtable willreturn ; ATTRIBUTOR-NEXT: define void @unreachable_exit_positive1() define void @unreachable_exit_positive1() #0 { tail call void @will_return() @@ -411,7 +426,8 @@ } ; FIXME: missing willreturn -; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable +; ATTRIBUTOR_MODULE: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; ATTRIBUTOR_CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable ; ATTRIBUTOR-NEXT: define i32 @unreachable_exit_positive2(i32 %0) define i32 @unreachable_exit_positive2(i32) local_unnamed_addr #0 { %2 = icmp slt i32 %0, 1 @@ -449,7 +465,8 @@ unreachable } -; ATTRIBUTOR: Function Attrs: nofree noinline norecurse noreturn nosync nounwind readnone uwtable +; ATTRIBUTOR_MODULE: Function Attrs: nofree noinline noreturn nosync nounwind readnone uwtable +; ATTRIBUTOR_CGSCC: Function Attrs: nofree noinline norecurse noreturn nosync nounwind readnone uwtable ; ATTRIBUTOR-NOT: willreturn ; ATTRIBUTOR-NEXT: define void @unreachable_exit_negative2() define void @unreachable_exit_negative2() #0 {