Index: include/llvm/Analysis/CGSCCPassManager.h =================================================================== --- include/llvm/Analysis/CGSCCPassManager.h +++ include/llvm/Analysis/CGSCCPassManager.h @@ -520,6 +520,21 @@ return CGSCCToFunctionPassAdaptor(std::move(Pass), DebugLogging); } + +/// A helper routine to update the call graph, analysis manager, and update +/// result after removing a single edge from the graph. +std::pair +updateCGAndAnalysisManagerForDeadEdge( + LazyCallGraph &G, LazyCallGraph::RefSCC *RC, LazyCallGraph::SCC *C, + LazyCallGraph::Node &SourceN, LazyCallGraph::Node &TargetN, bool IsCall, + CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, bool DebugLogging); + +/// A helper routine to update the call graph, analysis manager, and update +/// result after potentially removing calls or references from a node. +LazyCallGraph::SCC &updateCGAndAnalysisManagerForDCE( + LazyCallGraph &G, LazyCallGraph::SCC &InitialC, LazyCallGraph::Node &N, + CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, bool DebugLogging); + } #endif Index: include/llvm/Analysis/LazyCallGraph.h =================================================================== --- include/llvm/Analysis/LazyCallGraph.h +++ include/llvm/Analysis/LazyCallGraph.h @@ -924,6 +924,13 @@ /// useful to code doing updates or otherwise wanting to walk the IR in the /// same patterns as when we build the call graph. + /// Recursively visits the defined functions whose address is reachable from + /// every constant in the \p Worklist. + /// + /// Doesn't recurse through any constants already in the \p Visited set, and + /// updates that set with every constant visited. + /// + /// For each defined function, calls \p Callback with that function. template static void visitReferences(SmallVectorImpl &Worklist, SmallPtrSetImpl &Visited, @@ -932,7 +939,8 @@ Constant *C = Worklist.pop_back_val(); if (Function *F = dyn_cast(C)) { - Callback(*F); + if (!F->isDeclaration()) + Callback(*F); continue; } @@ -940,10 +948,10 @@ if (Visited.insert(cast(Op)).second) Worklist.push_back(cast(Op)); } - - ///@} } + ///@} + private: typedef SmallVectorImpl::reverse_iterator node_stack_iterator; typedef iterator_range node_stack_range; Index: include/llvm/Transforms/IPO/Inliner.h =================================================================== --- include/llvm/Transforms/IPO/Inliner.h +++ include/llvm/Transforms/IPO/Inliner.h @@ -1,4 +1,4 @@ -//===- InlinerPass.h - Code common to all inliners --------------*- C++ -*-===// +//===- LegacyInlinerBase.h - Inliner pass and infrastructure ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,19 +6,14 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file defines a simple policy-based bottom-up inliner. This file -// implements all of the boring mechanics of the bottom-up inlining, while the -// subclass determines WHAT to inline, which is the much more interesting -// component. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_TRANSFORMS_IPO_INLINERPASS_H -#define LLVM_TRANSFORMS_IPO_INLINERPASS_H +#ifndef LLVM_TRANSFORMS_IPO_INLINER_H +#define LLVM_TRANSFORMS_IPO_INLINER_H +#include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" @@ -29,13 +24,13 @@ class InlineCost; class OptimizationRemarkEmitter; class ProfileSummaryInfo; -template class SmallPtrSet; /// This class contains all of the helper code which is used to perform the -/// inlining operations that do not depend on the policy. -struct Inliner : public CallGraphSCCPass { - explicit Inliner(char &ID); - explicit Inliner(char &ID, bool InsertLifetime); +/// inlining operations that do not depend on the policy. It contains the core +/// bottom-up inlining infrastructure that specific inliner passes use. +struct LegacyInlinerBase : public CallGraphSCCPass { + explicit LegacyInlinerBase(char &ID); + explicit LegacyInlinerBase(char &ID, bool InsertLifetime); /// For this class, we declare that we require and preserve the call graph. /// If the derived class implements this method, it should always explicitly @@ -82,6 +77,33 @@ ImportedFunctionsInliningStatistics ImportedFunctionsStats; }; +/// The inliner pass for the new pass manager. +/// +/// This pass wires together the inlining utilities and the inline cost +/// analysis into a CGSCC pass. It considers every call in every function in +/// the SCC and tries to inline if profitable. It can be tuned with a number of +/// parameters to control what cost model is used and what tradeoffs are made +/// when making the decision. +/// +/// It should be noted that the legacy inliners do considerably more than this +/// inliner pass does. They provide logic for manually merging allocas, and +/// doing considerable DCE including the DCE of dead functions. This pass makes +/// every attempt to be simpler. DCE of functions requires complex reasoning +/// about comdat groups, etc. Instead, it is expected that other more focused +/// passes be composed to achieve the same end result. +class InlinerPass : public PassInfoMixin { +public: + InlinerPass(InlineParams Params = llvm::getInlineParams()) + : Params(std::move(Params)) {} + + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, + LazyCallGraph &CG, CGSCCUpdateResult &UR); + +private: + InlineParams Params; +}; + + } // End llvm namespace #endif Index: lib/Analysis/CGSCCPassManager.cpp =================================================================== --- lib/Analysis/CGSCCPassManager.cpp +++ lib/Analysis/CGSCCPassManager.cpp @@ -9,6 +9,7 @@ #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/InstIterator.h" using namespace llvm; @@ -131,6 +132,94 @@ } } +std::pair +llvm::updateCGAndAnalysisManagerForDeadEdge( + LazyCallGraph &G, LazyCallGraph::RefSCC *RC, LazyCallGraph::SCC *C, + LazyCallGraph::Node &SourceN, LazyCallGraph::Node &TargetN, bool IsCall, + CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, bool DebugLogging) { + typedef LazyCallGraph::SCC SCC; + typedef LazyCallGraph::RefSCC RefSCC; + + SCC &TargetC = *G.lookupSCC(TargetN); + RefSCC &TargetRC = TargetC.getOuterRefSCC(); + + if (&TargetRC != RC) { + RC->removeOutgoingEdge(SourceN, TargetN); + if (DebugLogging) + dbgs() << "Deleting outgoing edge from '" << SourceN << "' to '" + << TargetN << "'\n"; + return {RC, C}; + } + if (DebugLogging) + dbgs() << "Deleting internal " << (IsCall ? "call" : "ref") + << " edge from '" << SourceN << "' to '" << TargetN << "'\n"; + + if (IsCall) + C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(SourceN, TargetN), G, + SourceN, C, AM, UR, DebugLogging); + + auto NewRefSCCs = RC->removeInternalRefEdge(SourceN, TargetN); + if (!NewRefSCCs.empty()) { + // Note that we don't bother to invalidate analyses as ref-edge + // connectivity is not really observable in any way and is intended + // exclusively to be used for ordering of transforms rather than for + // analysis conclusions. + + // The RC worklist is in reverse postorder, so we first enqueue the + // current RefSCC as it will remain the parent of all split RefSCCs, then + // we enqueue the new ones in RPO except for the one which contains the + // source node as that is the "bottom" we will continue processing in the + // bottom-up walk. + UR.RCWorklist.insert(RC); + if (DebugLogging) + dbgs() << "Enqueing the existing RefSCC in the update worklist: " << *RC + << "\n"; + // Update the RC to the "bottom". + assert(G.lookupSCC(SourceN) == C && + "Changed the SCC when splitting RefSCCs!"); + RC = &C->getOuterRefSCC(); + assert(G.lookupRefSCC(SourceN) == RC && "Failed to update current RefSCC!"); + for (RefSCC *NewRC : reverse(NewRefSCCs)) + if (NewRC != RC) { + UR.RCWorklist.insert(NewRC); + if (DebugLogging) + dbgs() << "Enqueing a new RefSCC in the update worklist: " << *NewRC + << "\n"; + } + } + + return {RC, C}; +} + +static void demoteCallToRefEdge(LazyCallGraph &G, LazyCallGraph::RefSCC *&RC, + LazyCallGraph::SCC *&C, LazyCallGraph::Node &N, + LazyCallGraph::Node &TargetN, + CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, + bool DebugLogging) { + typedef LazyCallGraph::SCC SCC; + typedef LazyCallGraph::RefSCC RefSCC; + + SCC &TargetC = *G.lookupSCC(TargetN); + RefSCC &TargetRC = TargetC.getOuterRefSCC(); + + // The easy case is when the target RefSCC is not this RefSCC. This is + // only supported when the target RefSCC is a child of this RefSCC. + if (&TargetRC != RC) { + assert(RC->isAncestorOf(TargetRC) && + "Cannot potentially form RefSCC cycles here!"); + RC->switchOutgoingEdgeToRef(N, TargetN); + if (DebugLogging) + dbgs() << "Switch outgoing call edge to a ref edge from '" << N + << "' to '" << TargetN << "'\n"; + return; + } + + // Otherwise we are switching an internal call edge to a ref edge. This + // may split up some SCCs. + C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G, N, C, + AM, UR, DebugLogging); +} + LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( LazyCallGraph &G, LazyCallGraph::SCC &InitialC, LazyCallGraph::Node &N, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, bool DebugLogging) { @@ -175,28 +264,23 @@ // Now walk all references. for (BasicBlock &BB : F) - for (Instruction &I : BB) { + for (Instruction &I : BB) for (Value *Op : I.operand_values()) if (Constant *C = dyn_cast(Op)) if (Visited.insert(C).second) Worklist.push_back(C); - LazyCallGraph::visitReferences(Worklist, Visited, [&](Function &Referee) { - // Skip declarations. - if (Referee.isDeclaration()) - return; - - const Edge *E = N.lookup(Referee); - // FIXME: Similarly to new calls, we also currently preclude - // introducing new references. See above for details. - assert(E && "No function transformations should introduce *new* ref " - "edges! Any new ref edges would require IPO which " - "function passes aren't allowed to do!"); - RetainedEdges.insert(&Referee); - if (E->isCall()) - DemotedCallTargets.insert(&Referee); - }); - } + LazyCallGraph::visitReferences(Worklist, Visited, [&](Function &Referee) { + const Edge *E = N.lookup(Referee); + // FIXME: Similarly to new calls, we also currently preclude + // introducing new references. See above for details. + assert(E && "No function transformations should introduce *new* ref " + "edges! Any new ref edges would require IPO which " + "function passes aren't allowed to do!"); + RetainedEdges.insert(&Referee); + if (E->isCall()) + DemotedCallTargets.insert(&Referee); + }); // First remove all of the edges that are no longer present in this function. // We have to build a list of dead targets first and then remove them as the @@ -208,52 +292,8 @@ for (auto DeadTarget : DeadTargets) { Node &TargetN = *DeadTarget.getPointer(); bool IsCall = DeadTarget.getInt() == Edge::Call; - SCC &TargetC = *G.lookupSCC(TargetN); - RefSCC &TargetRC = TargetC.getOuterRefSCC(); - - if (&TargetRC != RC) { - RC->removeOutgoingEdge(N, TargetN); - if (DebugLogging) - dbgs() << "Deleting outgoing edge from '" << N << "' to '" << TargetN - << "'\n"; - continue; - } - if (DebugLogging) - dbgs() << "Deleting internal " << (IsCall ? "call" : "ref") - << " edge from '" << N << "' to '" << TargetN << "'\n"; - - if (IsCall) - C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G, N, - C, AM, UR, DebugLogging); - - auto NewRefSCCs = RC->removeInternalRefEdge(N, TargetN); - if (!NewRefSCCs.empty()) { - // Note that we don't bother to invalidate analyses as ref-edge - // connectivity is not really observable in any way and is intended - // exclusively to be used for ordering of transforms rather than for - // analysis conclusions. - - // The RC worklist is in reverse postorder, so we first enqueue the - // current RefSCC as it will remain the parent of all split RefSCCs, then - // we enqueue the new ones in RPO except for the one which contains the - // source node as that is the "bottom" we will continue processing in the - // bottom-up walk. - UR.RCWorklist.insert(RC); - if (DebugLogging) - dbgs() << "Enqueuing the existing RefSCC in the update worklist: " - << *RC << "\n"; - // Update the RC to the "bottom". - assert(G.lookupSCC(N) == C && "Changed the SCC when splitting RefSCCs!"); - RC = &C->getOuterRefSCC(); - assert(G.lookupRefSCC(N) == RC && "Failed to update current RefSCC!"); - for (RefSCC *NewRC : reverse(NewRefSCCs)) - if (NewRC != RC) { - UR.RCWorklist.insert(NewRC); - if (DebugLogging) - dbgs() << "Enqueuing a new RefSCC in the update worklist: " - << *NewRC << "\n"; - } - } + std::tie(RC, C) = updateCGAndAnalysisManagerForDeadEdge( + G, RC, C, N, TargetN, IsCall, AM, UR, DebugLogging); } // Next demote all the call edges that are now ref edges. This helps make @@ -261,25 +301,7 @@ // form cycles that this would break. for (Function *RefTarget : DemotedCallTargets) { Node &TargetN = *G.lookup(*RefTarget); - SCC &TargetC = *G.lookupSCC(TargetN); - RefSCC &TargetRC = TargetC.getOuterRefSCC(); - - // The easy case is when the target RefSCC is not this RefSCC. This is - // only supported when the target RefSCC is a child of this RefSCC. - if (&TargetRC != RC) { - assert(RC->isAncestorOf(TargetRC) && - "Cannot potentially form RefSCC cycles here!"); - RC->switchOutgoingEdgeToRef(N, TargetN); - if (DebugLogging) - dbgs() << "Switch outgoing call edge to a ref edge from '" << N - << "' to '" << TargetN << "'\n"; - continue; - } - - // Otherwise we are switching an internal call edge to a ref edge. This - // may split up some SCCs. - C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G, N, C, - AM, UR, DebugLogging); + demoteCallToRefEdge(G, RC, C, N, TargetN, AM, UR, DebugLogging); } // Now promote ref edges into call edges. @@ -359,3 +381,112 @@ return *C; } + +LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForDCE( + LazyCallGraph &G, LazyCallGraph::SCC &InitialC, LazyCallGraph::Node &N, + CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, bool DebugLogging) { + typedef LazyCallGraph::Node Node; + typedef LazyCallGraph::Edge Edge; + typedef LazyCallGraph::SCC SCC; + typedef LazyCallGraph::RefSCC RefSCC; + + if (N.begin() == N.end()) + // Can't remove edges that don't exist! + return InitialC; + + RefSCC &InitialRC = InitialC.getOuterRefSCC(); + SCC *C = &InitialC; + RefSCC *RC = &InitialRC; + Function &F = N.getFunction(); + + // Collect a set of calls and references that we can count down from. + // FIXME: We could make the subsequent mutations faster by remembering the + // position of the edges within the edge list of this node here. + SmallPtrSet DeadCallTargets; + SmallPtrSet DeadRefTargets; + for (Edge &E : N) { + DeadRefTargets.insert(&E.getFunction()); + if (E.isCall()) + DeadCallTargets.insert(&E.getFunction()); + } + + SmallVector Worklist; + SmallPtrSet Visited; + // First walk the function and handle all called functions. We do this first + // because if there is a single call edge, whether there are ref edges is + // irrelevant. + for (Instruction &I : instructions(F)) + if (auto CS = CallSite(&I)) + if (Function *Callee = CS.getCalledFunction()) + if (Visited.insert(Callee).second && !Callee->isDeclaration()) + // Actually do the work here of erasing the callee from the dead + // sets. + if (DeadCallTargets.erase(Callee)) + if (DeadCallTargets.empty()) + // Stop walking calls if we account for all of the edges. + break; + + // Now walk all references. We do this incrementally and with a fresh walk of + // instructions to allow us to end both of these walks early if we run out of + // potentially dead edges. + for (Instruction &I : instructions(F)) { + for (Value *Op : I.operand_values()) + if (Constant *C = dyn_cast(Op)) + if (Visited.insert(C).second) + Worklist.push_back(C); + LazyCallGraph::visitReferences( + Worklist, Visited, [&](Function &Referee) { + DeadRefTargets.erase(&Referee); + }); + // If we run out of (potentially) dead ref targets we can stop scanning. + if (DeadRefTargets.empty()) + break; + } + + if (DeadCallTargets.empty() && DeadRefTargets.empty()) + // No changes. + return InitialC; + + // First remove all of the edges that are no longer present in this function. + // We have to build a list of dead targets first and then remove them as the + // data structures will all be invalidated by removing them. We also use this + // to identify demoted edges as we have enough information already. + SmallVector, 4> DeadTargets; + SmallVector DemotedRefTargets; + for (Edge &E : N) { + if (!E.isCall()) { + if (DeadRefTargets.count(&E.getFunction())) + DeadTargets.push_back({E.getNode(), Edge::Ref}); + continue; + } + if (!DeadCallTargets.count(&E.getFunction())) + continue; + if (DeadRefTargets.count(&E.getFunction())) + DeadTargets.push_back({E.getNode(), Edge::Call}); + else + DemotedRefTargets.push_back(E.getNode()); + } + + for (auto DeadTarget : DeadTargets) { + Node &TargetN = *DeadTarget.getPointer(); + bool IsCall = DeadTarget.getInt() == Edge::Call; + std::tie(RC, C) = updateCGAndAnalysisManagerForDeadEdge( + G, RC, C, N, TargetN, IsCall, AM, UR, DebugLogging); + } + + for (Node *TargetN : DemotedRefTargets) + demoteCallToRefEdge(G, RC, C, N, *TargetN, AM, UR, DebugLogging); + + assert(!UR.InvalidatedSCCs.count(C) && "Invalidated the current SCC!"); + assert(!UR.InvalidatedRefSCCs.count(RC) && "Invalidated the current RefSCC!"); + assert(&C->getOuterRefSCC() == RC && "Current SCC not in current RefSCC!"); + + // Record the current RefSCC and SCC for higher layers of the CGSCC pass + // manager now that all the updates have been applied. + if (RC != &InitialRC) + UR.UpdatedRC = RC; + if (C != &InitialC) + UR.UpdatedC = C; + + return *C; +} Index: lib/Analysis/InlineCost.cpp =================================================================== --- lib/Analysis/InlineCost.cpp +++ lib/Analysis/InlineCost.cpp @@ -638,7 +638,7 @@ bool HotCallsite = false; uint64_t TotalWeight; - if (CS.getInstruction()->extractProfTotalWeight(TotalWeight) && + if (PSI && CS.getInstruction()->extractProfTotalWeight(TotalWeight) && PSI->isHotCount(TotalWeight)) { HotCallsite = true; } @@ -647,14 +647,14 @@ // when it would increase the threshold and the caller does not need to // minimize its size. bool InlineHint = Callee.hasFnAttribute(Attribute::InlineHint) || - PSI->isHotFunction(&Callee); + (PSI && PSI->isHotFunction(&Callee)); if (InlineHint && !Caller->optForMinSize()) Threshold = MaxIfValid(Threshold, Params.HintThreshold); if (HotCallsite && !Caller->optForMinSize()) Threshold = MaxIfValid(Threshold, Params.HotCallSiteThreshold); - bool ColdCallee = PSI->isColdFunction(&Callee); + bool ColdCallee = PSI && PSI->isColdFunction(&Callee); // For cold callees, use the ColdThreshold knob if it is available and reduces // the threshold. if (ColdCallee) Index: lib/Analysis/LazyCallGraph.cpp =================================================================== --- lib/Analysis/LazyCallGraph.cpp +++ lib/Analysis/LazyCallGraph.cpp @@ -10,6 +10,7 @@ #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" @@ -24,21 +25,11 @@ static void addEdge(SmallVectorImpl &Edges, DenseMap &EdgeIndexMap, Function &F, LazyCallGraph::Edge::Kind EK) { - // Note that we consider *any* function with a definition to be a viable - // edge. Even if the function's definition is subject to replacement by - // some other module (say, a weak definition) there may still be - // optimizations which essentially speculate based on the definition and - // a way to check that the specific definition is in fact the one being - // used. For example, this could be done by moving the weak definition to - // a strong (internal) definition and making the weak definition be an - // alias. Then a test of the address of the weak function against the new - // strong definition's address would be an effective way to determine the - // safety of optimizing a direct call edge. - if (!F.isDeclaration() && - EdgeIndexMap.insert({&F, Edges.size()}).second) { - DEBUG(dbgs() << " Added callable function: " << F.getName() << "\n"); - Edges.emplace_back(LazyCallGraph::Edge(F, EK)); - } + if (!EdgeIndexMap.insert({&F, Edges.size()}).second) + return; + + DEBUG(dbgs() << " Added callable function: " << F.getName() << "\n"); + Edges.emplace_back(LazyCallGraph::Edge(F, EK)); } LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F) @@ -55,14 +46,26 @@ // are trivially added, but to accumulate the latter we walk the instructions // and add every operand which is a constant to the worklist to process // afterward. + // + // Note that we consider *any* function with a definition to be a viable + // edge. Even if the function's definition is subject to replacement by + // some other module (say, a weak definition) there may still be + // optimizations which essentially speculate based on the definition and + // a way to check that the specific definition is in fact the one being + // used. For example, this could be done by moving the weak definition to + // a strong (internal) definition and making the weak definition be an + // alias. Then a test of the address of the weak function against the new + // strong definition's address would be an effective way to determine the + // safety of optimizing a direct call edge. for (BasicBlock &BB : F) for (Instruction &I : BB) { if (auto CS = CallSite(&I)) if (Function *Callee = CS.getCalledFunction()) - if (Callees.insert(Callee).second) { - Visited.insert(Callee); - addEdge(Edges, EdgeIndexMap, *Callee, LazyCallGraph::Edge::Call); - } + if (!Callee->isDeclaration()) + if (Callees.insert(Callee).second) { + Visited.insert(Callee); + addEdge(Edges, EdgeIndexMap, *Callee, LazyCallGraph::Edge::Call); + } for (Value *Op : I.operand_values()) if (Constant *C = dyn_cast(Op)) Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -70,6 +70,7 @@ #include "llvm/Transforms/IPO/GlobalDCE.h" #include "llvm/Transforms/IPO/GlobalOpt.h" #include "llvm/Transforms/IPO/InferFunctionAttrs.h" +#include "llvm/Transforms/IPO/Inliner.h" #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/IPO/PartialInlining.h" Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -85,6 +85,7 @@ #endif CGSCC_PASS("invalidate", InvalidateAllAnalysesPass()) CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass()) +CGSCC_PASS("inline", InlinerPass()) CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass()) #undef CGSCC_PASS Index: lib/Transforms/IPO/AlwaysInliner.cpp =================================================================== --- lib/Transforms/IPO/AlwaysInliner.cpp +++ lib/Transforms/IPO/AlwaysInliner.cpp @@ -26,8 +26,9 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/Inliner.h" using namespace llvm; @@ -62,14 +63,14 @@ /// /// Unlike the \c AlwaysInlinerPass, this uses the more heavyweight \c Inliner /// base class to provide several facilities such as array alloca merging. -class AlwaysInlinerLegacyPass : public Inliner { +class AlwaysInlinerLegacyPass : public LegacyInlinerBase { public: - AlwaysInlinerLegacyPass() : Inliner(ID, /*InsertLifetime*/ true) { + AlwaysInlinerLegacyPass() : LegacyInlinerBase(ID, /*InsertLifetime*/ true) { initializeAlwaysInlinerLegacyPassPass(*PassRegistry::getPassRegistry()); } - AlwaysInlinerLegacyPass(bool InsertLifetime) : Inliner(ID, InsertLifetime) { + AlwaysInlinerLegacyPass(bool InsertLifetime) : LegacyInlinerBase(ID, InsertLifetime) { initializeAlwaysInlinerLegacyPassPass(*PassRegistry::getPassRegistry()); } Index: lib/Transforms/IPO/InlineSimple.cpp =================================================================== --- lib/Transforms/IPO/InlineSimple.cpp +++ lib/Transforms/IPO/InlineSimple.cpp @@ -25,7 +25,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/Transforms/IPO/Inliner.h" using namespace llvm; @@ -38,16 +38,16 @@ /// The common implementation of the inlining logic is shared between this /// inliner pass and the always inliner pass. The two passes use different cost /// analyses to determine when to inline. -class SimpleInliner : public Inliner { +class SimpleInliner : public LegacyInlinerBase { InlineParams Params; public: - SimpleInliner() : Inliner(ID), Params(llvm::getInlineParams()) { + SimpleInliner() : LegacyInlinerBase(ID), Params(llvm::getInlineParams()) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } - explicit SimpleInliner(InlineParams Params) : Inliner(ID), Params(Params) { + explicit SimpleInliner(InlineParams Params) : LegacyInlinerBase(ID), Params(Params) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } @@ -101,10 +101,10 @@ bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) { TTIWP = &getAnalysis(); - return Inliner::runOnSCC(SCC); + return LegacyInlinerBase::runOnSCC(SCC); } void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); - Inliner::getAnalysisUsage(AU); + LegacyInlinerBase::getAnalysisUsage(AU); } Index: lib/Transforms/IPO/Inliner.cpp =================================================================== --- lib/Transforms/IPO/Inliner.cpp +++ lib/Transforms/IPO/Inliner.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/IPO/Inliner.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -26,12 +27,12 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -77,15 +78,15 @@ cl::Hidden, cl::desc("Enable inliner stats for imported functions")); } // namespace -Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {} +LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {} -Inliner::Inliner(char &ID, bool InsertLifetime) +LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime) : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {} /// For this class, we declare that we require and preserve the call graph. /// If the derived class implements this method, it should /// always explicitly call the implementation here. -void Inliner::getAnalysisUsage(AnalysisUsage &AU) const { +void LegacyInlinerBase::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -410,13 +411,13 @@ return false; } -bool Inliner::doInitialization(CallGraph &CG) { +bool LegacyInlinerBase::doInitialization(CallGraph &CG) { if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) ImportedFunctionsStats.setModuleInfo(CG.getModule()); return false; // No changes to CallGraph. } -bool Inliner::runOnSCC(CallGraphSCC &SCC) { +bool LegacyInlinerBase::runOnSCC(CallGraphSCC &SCC) { if (skipSCC(SCC)) return false; return inlineCalls(SCC); @@ -631,7 +632,7 @@ return Changed; } -bool Inliner::inlineCalls(CallGraphSCC &SCC) { +bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis().getCallGraph(); ACT = &getAnalysis(); PSI = getAnalysis().getPSI(CG.getModule()); @@ -656,7 +657,7 @@ /// Remove now-dead linkonce functions at the end of /// processing to avoid breaking the SCC traversal. -bool Inliner::doFinalization(CallGraph &CG) { +bool LegacyInlinerBase::doFinalization(CallGraph &CG) { if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) ImportedFunctionsStats.dump(InlinerFunctionImportStats == InlinerFunctionImportStatsOpts::Verbose); @@ -664,7 +665,7 @@ } /// Remove dead functions that are not included in DNR (Do Not Remove) list. -bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { +bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { SmallVector FunctionsToRemove; SmallVector DeadFunctionsInComdats; SmallDenseMap ComdatEntriesAlive; @@ -766,3 +767,147 @@ } return true; } + +PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, + CGSCCAnalysisManager &AM, LazyCallGraph &CG, + CGSCCUpdateResult &UR) { + FunctionAnalysisManager &FAM = + AM.getResult(InitialC, CG).getManager(); + const ModuleAnalysisManager &MAM = + AM.getResult(InitialC, CG).getManager(); + bool Changed = false; + + assert(InitialC.size() > 0 && "Cannot handle an empty SCC!"); + Module &M = *InitialC.begin()->getFunction().getParent(); + auto *PSI = MAM.getCachedResult(M); + + std::function GetAssumptionCache = + [&](Function &F) -> AssumptionCache & { + return FAM.getResult(F); + }; + + auto GetInlineCost = [&](CallSite CS) { + Function &Callee = *CS.getCalledFunction(); + auto &CalleeTTI = FAM.getResult(Callee); + return llvm::getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, PSI); + }; + + InlineFunctionInfo IFI(/*cg=*/nullptr, &GetAssumptionCache); + SmallVector Calls; + + // We need several datastructures to use while updating the call graph during + // inlining. + SmallVector RefWorklist; + SmallPtrSet RefVisited; + + // We use a worklist of nodes to process so that we can handle if the SCC + // structure changes and some nodes are no longer part of the current SCC. We + // also need to use an updatable pointer for the SCC as a consequence. + SmallVector Nodes; + for (auto &N : InitialC) + Nodes.push_back(&N); + auto *C = &InitialC; + auto *RC = &C->getOuterRefSCC(); + + do { + auto &N = *Nodes.pop_back_val(); + if (CG.lookupSCC(N) != C) + continue; + Function &F = N.getFunction(); + if (F.hasFnAttribute(Attribute::OptimizeNone)) + continue; + + // Get the remarks emission analysis for the caller. + auto &ORE = FAM.getResult(F); + + for (Instruction &I : instructions(F)) + if (auto CS = CallSite(&I)) + if (Function *Callee = CS.getCalledFunction()) + if (!Callee->isDeclaration()) + Calls.push_back(CS); + + bool DidInline = false; + while (!Calls.empty()) { + CallSite CS = Calls.pop_back_val(); + Function &Callee = *CS.getCalledFunction(); + + // Check whether we want to inline this callsite. + if (!shouldInline(CS, GetInlineCost, ORE)) + continue; + + // Part of the (somewhat implicit) contract of InlineFunction is that it + // always inserts the new basic blocks at the end of the function. This + // allows us to walk those new instructions by just grabbing the end + // instruction iterator and backing up one. + auto LastBBI = std::prev(F.end()); + + if (!InlineFunction(CS, IFI)) + continue; + DidInline = true; + + // Now walk the basic blocks and instructions that were inlined and add + // any new calls to the worklist. + RefVisited.clear(); + for (BasicBlock &BB : make_range(std::next(LastBBI), F.end())) + for (Instruction &I : BB) { + if (auto CS = CallSite(&I)) + if (Function *NewCallee = CS.getCalledFunction()) + if (!NewCallee->isDeclaration()) { + Calls.push_back(CS); + RefVisited.insert(NewCallee); + + // By definition when inlining, we can just use trivial inserts + // into the graph. This will even skip already present edges. + RC->insertTrivialCallEdge(N, *CG.lookup(*NewCallee)); + } + for (Value *Op : I.operand_values()) + if (Constant *C = dyn_cast(Op)) + if (RefVisited.insert(C).second) + RefWorklist.push_back(C); + } + LazyCallGraph::visitReferences( + RefWorklist, RefVisited, [&](Function &Referee) { + // By definition when inlining, we can just use trivial inserts + // into the graph. This will even skip already present edges. + RC->insertTrivialRefEdge(N, *CG.lookup(Referee)); + }); + + // For local functions, check whether this makes the callee trivially + // dead so we don't miss the chance to flatten single-call trees of + // functions and otherwise observe the reduced number of users of other + // functions by deleting the caller. + if (Callee.hasLocalLinkage()) { + // To do this we also need to nuke any dead constant uses + // (perhaps made dead by this operation on other functions). + Callee.removeDeadConstantUsers(); + if (Callee.use_empty()) { + std::tie(RC, C) = updateCGAndAnalysisManagerForDeadEdge( + CG, RC, C, N, *CG.lookup(Callee), /*IsCall*/ true, AM, UR, + /*DebugLogging*/ true); + CG.removeDeadFunction(Callee); + M.getFunctionList().erase(Callee); + } + } + } + + if (!DidInline) { + Changed = true; + continue; + } + + // At this point, since we have made changes we have at least removed + // a call instruction. This could remove a call edge or ref edge + // completely, or it could remove a call edge to a function for which we + // have some other ref edge, thereby "demoting" it. But moreover, we want + // to support InlineFunction potentially doing some simplification of code + // so we handle any DCE-related changes to the call graph here from first + // principles. This is already much faster than a fully general call graph + // update which we can get away with because we insert all of the new edges + // as the inlining occurs when it is simplest to do so. + C = &updateCGAndAnalysisManagerForDCE(CG, *C, N, AM, UR, + /*DebugLogging*/ true); + RC = &C->getOuterRefSCC(); + } while (!Nodes.empty()); + + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} Index: test/Transforms/Inline/basictest.ll =================================================================== --- test/Transforms/Inline/basictest.ll +++ test/Transforms/Inline/basictest.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -inline -sroa -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline,function(sroa))' -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" define i32 @test1f(i32 %i) { Index: test/Transforms/Inline/nested-inline.ll =================================================================== --- test/Transforms/Inline/nested-inline.ll +++ test/Transforms/Inline/nested-inline.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s ; Test that bar and bar2 are both inlined throughout and removed. @A = weak global i32 0 ; [#uses=1] @B = weak global i32 0 ; [#uses=1]