diff --git a/llvm/include/llvm/Transforms/IPO/Inliner.h b/llvm/include/llvm/Transforms/IPO/Inliner.h --- a/llvm/include/llvm/Transforms/IPO/Inliner.h +++ b/llvm/include/llvm/Transforms/IPO/Inliner.h @@ -17,6 +17,7 @@ #include "llvm/Analysis/ReplayInlineAdvisor.h" #include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" #include "llvm/IR/PassManager.h" +#include "llvm/Transforms/IPO/ModuleInliner.h" #include namespace llvm { @@ -121,7 +122,7 @@ ModuleInlinerWrapperPass( InlineParams Params = getInlineParams(), bool MandatoryFirst = true, InliningAdvisorMode Mode = InliningAdvisorMode::Default, - unsigned MaxDevirtIterations = 0); + unsigned MaxDevirtIterations = 0, bool UseModuleInliner = false); ModuleInlinerWrapperPass(ModuleInlinerWrapperPass &&Arg) = default; PreservedAnalyses run(Module &, ModuleAnalysisManager &); diff --git a/llvm/include/llvm/Transforms/IPO/ModuleInliner.h b/llvm/include/llvm/Transforms/IPO/ModuleInliner.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/ModuleInliner.h @@ -0,0 +1,50 @@ +//===- ModuleInliner.h - Module level Inliner pass --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_MODULEINLINER_H +#define LLVM_TRANSFORMS_IPO_MODULEINLINER_H + +#include "llvm/Analysis/InlineAdvisor.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/ReplayInlineAdvisor.h" +#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" +#include "llvm/IR/PassManager.h" +#include + +namespace llvm { + +class AssumptionCacheTracker; +class ProfileSummaryInfo; + +/// The module inliner pass for the new pass manager. +/// +/// This pass wires together the inlining utilities and the inline cost +/// analysis into a module pass. Different from SCC inliner, it considers every +/// call in every function in the whole module and tries to inline if +/// profitable. With this module level inliner, it is possible to evaluate more +/// heuristics in the module level such like PrioriInlineOrder. It can be tuned +/// with a number of parameters to control what cost model is used and what +/// tradeoffs are made when making the decision. +class ModuleInlinerPass : public PassInfoMixin { +public: + ModuleInlinerPass(bool OnlyMandatory = false) + : OnlyMandatory(OnlyMandatory) {} + ModuleInlinerPass(ModuleInlinerPass &&Arg) = default; + + PreservedAnalyses run(Module &, ModuleAnalysisManager &); + +private: + InlineAdvisor &getAdvisor(const ModuleAnalysisManager &MAM, + FunctionAnalysisManager &FAM, Module &M); + std::unique_ptr OwnedAdvisor; + const bool OnlyMandatory; +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_INLINER_H \ No newline at end of file diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -109,6 +109,7 @@ #include "llvm/Transforms/IPO/LoopExtractor.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/IPO/MergeFunctions.h" +#include "llvm/Transforms/IPO/ModuleInliner.h" #include "llvm/Transforms/IPO/OpenMPOpt.h" #include "llvm/Transforms/IPO/PartialInlining.h" #include "llvm/Transforms/IPO/SCCP.h" @@ -274,6 +275,10 @@ cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining.")); +static cl::opt EnableModuleInliner("enable-module-inliner", + cl::init(false), cl::Hidden, + cl::desc("Enable module inliner")); + static cl::opt EnableO3NonTrivialUnswitching( "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3")); @@ -978,7 +983,8 @@ IP.EnableDeferral = EnablePGOInlineDeferral; ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst, - UseInlineAdvisor, MaxDevirtIterations); + UseInlineAdvisor, MaxDevirtIterations, + EnableModuleInliner); // Require the GlobalsAA analysis for the module so we can query it within // the CGSCC pipeline. diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -115,6 +115,7 @@ MODULE_PASS("dfsan", DataFlowSanitizerPass()) MODULE_PASS("asan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/false, false, true, false)) MODULE_PASS("msan-module", MemorySanitizerPass({})) +MODULE_PASS("module-inline", ModuleInlinerPass()) MODULE_PASS("tsan-module", ThreadSanitizerPass()) MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, false, true, false)) MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass()) diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -29,6 +29,7 @@ LoopExtractor.cpp LowerTypeTests.cpp MergeFunctions.cpp + ModuleInliner.cpp OpenMPOpt.cpp PartialInlining.cpp PassManagerBuilder.cpp diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -1157,7 +1157,8 @@ ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params, bool MandatoryFirst, InliningAdvisorMode Mode, - unsigned MaxDevirtIterations) + unsigned MaxDevirtIterations, + bool UseModuleInliner) : Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations), PM(), MPM() { // Run the inliner first. The theory is that we are walking bottom-up and so @@ -1165,6 +1166,13 @@ // into the callers so that our optimizations can reflect that. // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO // because it makes profile annotation in the backend inaccurate. + if (UseModuleInliner) { + if (MandatoryFirst) + MPM.addPass(ModuleInlinerPass(/*OnlyMandatory*/ true)); + MPM.addPass(ModuleInlinerPass()); + return; + } + if (MandatoryFirst) PM.addPass(InlinerPass(/*OnlyMandatory*/ true)); PM.addPass(InlinerPass()); diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp @@ -0,0 +1,525 @@ +//===- ModuleInliner.cpp - Code related to module inliner +//--------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the mechanics required to implement inlining without +// missing any calls and updating the call graph. The decisions of which calls +// are profitable to inline are implemented elsewhere. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/ModuleInliner.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InlineAdvisor.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/CallPromotionUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "module-inline" + +STATISTIC(NumInlined, "Number of functions inlined"); +STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined"); +STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); +STATISTIC(NumMergedAllocas, "Number of allocas merged together"); + +static cl::opt ModuleInlineReplayFile( + "module-inline-replay", cl::init(""), cl::value_desc("filename"), + cl::desc( + "Optimization remarks file containing inline remarks to be replayed " + "by inlining from module inline remarks."), + cl::Hidden); + +static cl::opt InlineEnablePriorityOrder( + "module-inline-enable-priority-order", cl::Hidden, cl::init(false), + cl::desc("Enable the priority inline order for the inliner")); + +/// Return true if the specified inline history ID +/// indicates an inline history that includes the specified function. +static bool inlineHistoryIncludes( + Function *F, int InlineHistoryID, + const SmallVectorImpl> &InlineHistory) { + while (InlineHistoryID != -1) { + assert(unsigned(InlineHistoryID) < InlineHistory.size() && + "Invalid inline history ID"); + if (InlineHistory[InlineHistoryID].first == F) + return true; + InlineHistoryID = InlineHistory[InlineHistoryID].second; + } + return false; +} + +template class InlineOrder { +public: + using reference = T &; + using const_reference = const T &; + + virtual ~InlineOrder() {} + + virtual size_t size() = 0; + + virtual void push(const T &Elt) = 0; + + virtual T pop() = 0; + + virtual const_reference front() = 0; + + virtual void erase_if(function_ref Pred) = 0; + + bool empty() { return !size(); } +}; + +template > +class DefaultInlineOrder : public InlineOrder { + using reference = T &; + using const_reference = const T &; + +public: + size_t size() override { return Calls.size() - FirstIndex; } + + void push(const T &Elt) override { Calls.push_back(Elt); } + + T pop() override { + assert(size() > 0); + return Calls[FirstIndex++]; + } + + const_reference front() override { + assert(size() > 0); + return Calls[FirstIndex]; + } + + void erase_if(function_ref Pred) override { + Calls.erase(std::remove_if(Calls.begin() + FirstIndex, Calls.end(), Pred), + Calls.end()); + } + +private: + Container Calls; + size_t FirstIndex = 0; +}; + +class PriorityInlineOrder : public InlineOrder> { + using T = std::pair; + using reference = T &; + using const_reference = const T &; + + // Return true if S1 is more desirable than S2. + static bool isMoreDesirable(int S1, int S2) { return S1 < S2; } + + static bool cmp(const T &P1, const T &P2) { + return isMoreDesirable(P2.second, P1.second); + } + + int evaluate(CallBase *CB) { + Function *Callee = CB->getCalledFunction(); + return (int)Callee->getInstructionCount(); + } + + // A call site could become less desirable for inlining because of the size + // growth from prior inlining into the callee. This method is used to lazily + // update the desirability of a call site if it's decreasing. It is only + // called on pop() or front(), not every time the desirability changes. When + // the desirability of the front call site decreases, an updated one would be + // pushed right back into the heap. For simplicity, those cases where + // the desirability of a call site increases are ignored here. + void adjust() { + bool Changed = false; + do { + CallBase *CB = Heap.front().first; + const int PreviousGoodness = Heap.front().second; + const int CurrentGoodness = evaluate(CB); + Changed = isMoreDesirable(PreviousGoodness, CurrentGoodness); + if (Changed) { + std::pop_heap(Heap.begin(), Heap.end(), cmp); + Heap.pop_back(); + Heap.push_back({CB, CurrentGoodness}); + std::push_heap(Heap.begin(), Heap.end(), cmp); + } + } while (Changed); + } + +public: + size_t size() override { return Heap.size(); } + + void push(const T &Elt) override { + CallBase *CB = Elt.first; + const int InlineHistoryID = Elt.second; + const int Goodness = evaluate(CB); + + Heap.push_back({CB, Goodness}); + std::push_heap(Heap.begin(), Heap.end(), cmp); + InlineHistoryMap[CB] = InlineHistoryID; + } + + T pop() override { + assert(size() > 0); + adjust(); + + CallBase *CB = Heap.front().first; + T Result = std::make_pair(CB, InlineHistoryMap[CB]); + InlineHistoryMap.erase(CB); + std::pop_heap(Heap.begin(), Heap.end(), cmp); + Heap.pop_back(); + return Result; + } + + const_reference front() override { + assert(size() > 0); + adjust(); + + CallBase *CB = Heap.front().first; + return *InlineHistoryMap.find(CB); + } + + void erase_if(function_ref Pred) override { + Heap.erase(std::remove_if(Heap.begin(), Heap.end(), Pred), Heap.end()); + std::make_heap(Heap.begin(), Heap.end(), cmp); + } + +private: + SmallVector Heap; + DenseMap InlineHistoryMap; +}; + +InlineAdvisor &ModuleInlinerPass::getAdvisor(const ModuleAnalysisManager &MAM, + FunctionAnalysisManager &FAM, + Module &M) { + if (OwnedAdvisor) + return *OwnedAdvisor; + + auto *IAA = MAM.getCachedResult(M); + if (!IAA) { + // It should still be possible to run the inliner as a stand-alone SCC pass, + // for test scenarios. In that case, we default to the + // DefaultInlineAdvisor, which doesn't need to keep state between SCC pass + // runs. It also uses just the default InlineParams. + // In this case, we need to use the provided FAM, which is valid for the + // duration of the inliner pass, and thus the lifetime of the owned advisor. + // The one we would get from the MAM can be invalidated as a result of the + // inliner's activity. + OwnedAdvisor = + std::make_unique(M, FAM, getInlineParams()); + + if (!ModuleInlineReplayFile.empty()) + OwnedAdvisor = std::make_unique( + M, FAM, M.getContext(), std::move(OwnedAdvisor), + ModuleInlineReplayFile, + /*EmitRemarks=*/true); + + return *OwnedAdvisor; + } + assert(IAA->getAdvisor() && + "Expected a present InlineAdvisorAnalysis also have an " + "InlineAdvisor initialized"); + return *IAA->getAdvisor(); +} + +static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) { + LibFunc LF; + + // Either this is a normal library function or a "vectorizable" + // function. Not using the VFDatabase here because this query + // is related only to libraries handled via the TLI. + return TLI.getLibFunc(F, LF) || + TLI.isKnownVectorFunctionInLibrary(F.getName()); +} + +PreservedAnalyses ModuleInlinerPass::run(Module &M, + ModuleAnalysisManager &MAM) { + bool Changed = false; + + ProfileSummaryInfo *PSI = MAM.getCachedResult(M); + + FunctionAnalysisManager &FAM = + MAM.getResult(M).getManager(); + + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; + + InlineAdvisor &Advisor = getAdvisor(MAM, FAM, M); + Advisor.onPassEntry(); + + auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(); }); + + // We use a single common worklist for calls across the entire Module. We + // process these in-order and append new calls introduced during inlining to + // the end. The PriorityInlineOrder is optional here, in which the smaller + // callee would have a higher priority to inline. + // + // Note that this particular order of processing is actually critical to + // avoid very bad behaviors. Consider *highly connected* call graphs where + // each function contains a small amount of code and a couple of calls to + // other functions. Because the LLVM inliner is fundamentally a bottom-up + // inliner, it can handle gracefully the fact that these all appear to be + // reasonable inlining candidates as it will flatten things until they become + // too big to inline, and then move on and flatten another batch. + // + // However, when processing call edges *within* an SCC we cannot rely on this + // bottom-up behavior. As a consequence, with heavily connected *SCCs* of + // functions we can end up incrementally inlining N calls into each of + // N functions because each incremental inlining decision looks good and we + // don't have a topological ordering to prevent explosions. + // + // To compensate for this, we don't process transitive edges made immediate + // by inlining until we've done one pass of inlining across the entire SCC. + // Large, highly connected SCCs still lead to some amount of code bloat in + // this model, but it is uniformly spread across all the functions in the SCC + // and eventually they all become too large to inline, rather than + // incrementally maknig a single function grow in a super linear fashion. + std::unique_ptr>> Calls; + if (InlineEnablePriorityOrder) + Calls = std::make_unique(); + else + Calls = std::make_unique>>(); + assert(Calls != nullptr && "Expected an initialized InlineOrder"); + + // Populate the initial list of calls in this module. + for (Function &F : M) { + auto &ORE = FAM.getResult(F); + // We want to generally process call sites top-down in order for + // simplifications stemming from replacing the call with the returned value + // after inlining to be visible to subsequent inlining decisions. + // FIXME: Using instructions sequence is a really bad way to do this. + // Instead we should do an actual RPO walk of the function body. + for (Instruction &I : instructions(F)) + if (auto *CB = dyn_cast(&I)) + if (Function *Callee = CB->getCalledFunction()) { + if (!Callee->isDeclaration()) + Calls->push({CB, -1}); + else if (!isa(I)) { + using namespace ore; + setInlineRemark(*CB, "unavailable definition"); + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) + << NV("Callee", Callee) << " will not be inlined into " + << NV("Caller", CB->getCaller()) + << " because its definition is unavailable" + << setIsVerbose(); + }); + } + } + } + if (Calls->empty()) + return PreservedAnalyses::all(); + + // When inlining a callee produces new call sites, we want to keep track of + // the fact that they were inlined from the callee. This allows us to avoid + // infinite inlining in some obscure cases. To represent this, we use an + // index into the InlineHistory vector. + SmallVector, 16> InlineHistory; + + // Track a set vector of inlined callees so that we can augment the caller + // with all of their edges in the call graph before pruning out the ones that + // got simplified away. + SmallSetVector InlinedCallees; + + // Track the dead functions to delete once finished with inlining calls. We + // defer deleting these to make it easier to handle the call graph updates. + SmallVector DeadFunctions; + + // Loop forward over all of the calls. + while (!Calls->empty()) { + // We expect the calls to typically be batched with sequences of calls that + // have the same caller, so we first set up some shared infrastructure for + // this caller. We also do any pruning we can at this layer on the caller + // alone. + Function &F = *Calls->front().first->getCaller(); + + LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n" + << " Function size: " << F.getInstructionCount() + << "\n"); + + auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { + return FAM.getResult(F); + }; + + // Now process as many calls as we have within this caller in the sequence. + // We bail out as soon as the caller has to change so we can update the + // call graph and prepare the context of that new caller. + bool DidInline = false; + while (!Calls->empty() && Calls->front().first->getCaller() == &F) { + auto P = Calls->pop(); + CallBase *CB = P.first; + const int InlineHistoryID = P.second; + Function &Callee = *CB->getCalledFunction(); + + if (InlineHistoryID != -1 && + inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) { + setInlineRemark(*CB, "recursive"); + continue; + } + + auto Advice = Advisor.getAdvice(*CB, OnlyMandatory); + // Check whether we want to inline this callsite. + if (!Advice->isInliningRecommended()) { + Advice->recordUnattemptedInlining(); + continue; + } + + // Setup the data structure used to plumb customization into the + // `InlineFunction` routine. + InlineFunctionInfo IFI( + /*cg=*/nullptr, GetAssumptionCache, PSI, + &FAM.getResult(*(CB->getCaller())), + &FAM.getResult(Callee)); + + InlineResult IR = + InlineFunction(*CB, IFI, &FAM.getResult(*CB->getCaller())); + if (!IR.isSuccess()) { + Advice->recordUnsuccessfulInlining(IR); + continue; + } + + DidInline = true; + InlinedCallees.insert(&Callee); + ++NumInlined; + + LLVM_DEBUG(dbgs() << " Size after inlining: " + << F.getInstructionCount() << "\n"); + + // Add any new callsites to defined functions to the worklist. + if (!IFI.InlinedCallSites.empty()) { + int NewHistoryID = InlineHistory.size(); + InlineHistory.push_back({&Callee, InlineHistoryID}); + + for (CallBase *ICB : reverse(IFI.InlinedCallSites)) { + Function *NewCallee = ICB->getCalledFunction(); + if (!NewCallee) { + // Try to promote an indirect (virtual) call without waiting for + // the post-inline cleanup and the next DevirtSCCRepeatedPass + // iteration because the next iteration may not happen and we may + // miss inlining it. + if (tryPromoteCall(*ICB)) + NewCallee = ICB->getCalledFunction(); + } + if (NewCallee) + if (!NewCallee->isDeclaration()) + Calls->push({ICB, NewHistoryID}); + } + } + + // Merge the attributes based on the inlining. + AttributeFuncs::mergeAttributesForInlining(F, Callee); + + // For local functions, check whether this makes the callee trivially + // dead. In that case, we can drop the body of the function eagerly + // which may reduce the number of callers of other functions to one, + // changing inline cost thresholds. + bool CalleeWasDeleted = false; + if (Callee.hasLocalLinkage()) { + // To check this we also need to nuke any dead constant uses (perhaps + // made dead by this operation on other functions). + Callee.removeDeadConstantUsers(); + // if (Callee.use_empty() && !CG.isLibFunction(Callee)) { + if (Callee.use_empty() && !isKnownLibFunction(Callee, GetTLI(Callee))) { + Calls->erase_if([&](const std::pair &Call) { + return Call.first->getCaller() == &Callee; + }); + // Clear the body and queue the function itself for deletion when we + // finish inlining. + // Note that after this point, it is an error to do anything other + // than use the callee's address or delete it. + Callee.dropAllReferences(); + assert(!is_contained(DeadFunctions, &Callee) && + "Cannot put cause a function to become dead twice!"); + DeadFunctions.push_back(&Callee); + CalleeWasDeleted = true; + } + } + if (CalleeWasDeleted) + Advice->recordInliningWithCalleeDeleted(); + else + Advice->recordInlining(); + } + + if (!DidInline) + continue; + Changed = true; + + InlinedCallees.clear(); + } + + // Now that we've finished inlining all of the calls across this module, + // delete all of the trivially dead functions. + // + // Note that this walks a pointer set which has non-deterministic order but + // that is OK as all we do is delete things and add pointers to unordered + // sets. + for (Function *DeadF : DeadFunctions) { + // Clear out any cached analyses. + FAM.clear(*DeadF, DeadF->getName()); + + // And delete the actual function from the module. + // The Advisor may use Function pointers to efficiently index various + // internal maps, e.g. for memoization. Function cleanup passes like + // argument promotion create new functions. It is possible for a new + // function to be allocated at the address of a deleted function. We could + // index using names, but that's inefficient. Alternatively, we let the + // Advisor free the functions when it sees fit. + DeadF->getBasicBlockList().clear(); + M.getFunctionList().remove(DeadF); + + ++NumDeleted; + } + + if (!Changed) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} \ No newline at end of file diff --git a/llvm/test/Transforms/Inline/callbr.ll b/llvm/test/Transforms/Inline/callbr.ll --- a/llvm/test/Transforms/Inline/callbr.ll +++ b/llvm/test/Transforms/Inline/callbr.ll @@ -1,5 +1,6 @@ ; RUN: opt -inline -S < %s | FileCheck %s ; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s +; RUN: opt -passes='module-inline' -S < %s | FileCheck %s define dso_local i32 @main() #0 { %1 = alloca i32, align 4 diff --git a/llvm/test/Transforms/Inline/casts.ll b/llvm/test/Transforms/Inline/casts.ll --- a/llvm/test/Transforms/Inline/casts.ll +++ b/llvm/test/Transforms/Inline/casts.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -inline -S | FileCheck %s ; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s +; RUN: opt < %s -passes='module-inline' -S | FileCheck %s define i32 @testByte(i8 %X) { entry: diff --git a/llvm/test/Transforms/Inline/comdat-ipo.ll b/llvm/test/Transforms/Inline/comdat-ipo.ll --- a/llvm/test/Transforms/Inline/comdat-ipo.ll +++ b/llvm/test/Transforms/Inline/comdat-ipo.ll @@ -1,5 +1,6 @@ ; RUN: opt -inline -S < %s | FileCheck %s ; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s +; RUN: opt -passes='module-inline' -S < %s | FileCheck %s define i32 @caller() { ; CHECK-LABEL: @caller( diff --git a/llvm/test/Transforms/Inline/crash-lifetime-marker.ll b/llvm/test/Transforms/Inline/crash-lifetime-marker.ll --- a/llvm/test/Transforms/Inline/crash-lifetime-marker.ll +++ b/llvm/test/Transforms/Inline/crash-lifetime-marker.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -inline -S | FileCheck %s ; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s +; RUN: opt < %s -passes='module-inline' -S | FileCheck %s ; InlineFunction would assert inside the loop that leaves lifetime markers if ; there was an zero-sized AllocaInst. Check that it doesn't assert and doesn't diff --git a/llvm/test/Transforms/Inline/frameescape.ll b/llvm/test/Transforms/Inline/frameescape.ll --- a/llvm/test/Transforms/Inline/frameescape.ll +++ b/llvm/test/Transforms/Inline/frameescape.ll @@ -1,5 +1,6 @@ ; RUN: opt -inline -S < %s | FileCheck %s ; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s +; RUN: opt -passes='module-inline' -S < %s | FileCheck %s ; PR23216: We can't inline functions using llvm.localescape. diff --git a/llvm/test/Transforms/Inline/inline-assume.ll b/llvm/test/Transforms/Inline/inline-assume.ll --- a/llvm/test/Transforms/Inline/inline-assume.ll +++ b/llvm/test/Transforms/Inline/inline-assume.ll @@ -1,5 +1,6 @@ ; RUN: opt -inline -S -o - < %s | FileCheck %s ; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s +; RUN: opt -passes='module-inline' -S < %s | FileCheck %s %0 = type opaque %struct.Foo = type { i32, %0* } diff --git a/llvm/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll b/llvm/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll --- a/llvm/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll +++ b/llvm/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll @@ -1,5 +1,6 @@ ; RUN: opt -S -inline < %s | FileCheck %s ; RUN: opt -S -passes='cgscc(inline)' < %s | FileCheck %s +; RUN: opt -S -passes='module-inline' < %s | FileCheck %s target datalayout = "e-p3:32:32-p4:64:64-n32" diff --git a/llvm/test/Transforms/Inline/inline-fast-math-flags.ll b/llvm/test/Transforms/Inline/inline-fast-math-flags.ll --- a/llvm/test/Transforms/Inline/inline-fast-math-flags.ll +++ b/llvm/test/Transforms/Inline/inline-fast-math-flags.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -S -inline -inline-threshold=20 | FileCheck %s ; RUN: opt < %s -S -passes='cgscc(inline)' -inline-threshold=20 | FileCheck %s +; RUN: opt < %s -S -passes='module-inline' -inline-threshold=20 | FileCheck %s ; Check that we don't drop FastMathFlag when estimating inlining profitability. ; ; In this test we should inline 'foo' to 'boo', because it'll fold to a diff --git a/llvm/test/Transforms/Inline/inline-vla.ll b/llvm/test/Transforms/Inline/inline-vla.ll --- a/llvm/test/Transforms/Inline/inline-vla.ll +++ b/llvm/test/Transforms/Inline/inline-vla.ll @@ -1,5 +1,6 @@ ; RUN: opt -S -inline %s -o - | FileCheck %s ; RUN: opt -S -passes='cgscc(inline)' %s -o - | FileCheck %s +; RUN: opt -S -passes='module-inline' %s -o - | FileCheck %s ; Check that memcpy2 is completely inlined away. ; CHECK-NOT: memcpy2 diff --git a/llvm/test/Transforms/Inline/invoke-cleanup.ll b/llvm/test/Transforms/Inline/invoke-cleanup.ll --- a/llvm/test/Transforms/Inline/invoke-cleanup.ll +++ b/llvm/test/Transforms/Inline/invoke-cleanup.ll @@ -1,5 +1,6 @@ ; RUN: opt %s -inline -S | FileCheck %s ; RUN: opt %s -passes='cgscc(inline)' -S | FileCheck %s +; RUN: opt %s -passes='module-inline' -S | FileCheck %s declare void @external_func() diff --git a/llvm/test/Transforms/Inline/invoke-combine-clauses.ll b/llvm/test/Transforms/Inline/invoke-combine-clauses.ll --- a/llvm/test/Transforms/Inline/invoke-combine-clauses.ll +++ b/llvm/test/Transforms/Inline/invoke-combine-clauses.ll @@ -1,4 +1,5 @@ ; RUN: opt %s -passes='cgscc(inline)' -S | FileCheck %s +; RUN: opt %s -passes='module-inline' -S | FileCheck %s declare void @external_func() declare void @abort() diff --git a/llvm/test/Transforms/Inline/invoke_test-1.ll b/llvm/test/Transforms/Inline/invoke_test-1.ll --- a/llvm/test/Transforms/Inline/invoke_test-1.ll +++ b/llvm/test/Transforms/Inline/invoke_test-1.ll @@ -3,6 +3,7 @@ ; RUN: opt < %s -inline -S | FileCheck %s ; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s +; RUN: opt < %s -passes='module-inline' -S | FileCheck %s declare void @might_throw() diff --git a/llvm/test/Transforms/Inline/invoke_test-3.ll b/llvm/test/Transforms/Inline/invoke_test-3.ll --- a/llvm/test/Transforms/Inline/invoke_test-3.ll +++ b/llvm/test/Transforms/Inline/invoke_test-3.ll @@ -3,6 +3,7 @@ ; RUN: opt < %s -inline -S | FileCheck %s ; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s +; RUN: opt < %s -passes='module-inline' -S | FileCheck %s declare void @might_throw() diff --git a/llvm/test/Transforms/Inline/nested-inline.ll b/llvm/test/Transforms/Inline/nested-inline.ll --- a/llvm/test/Transforms/Inline/nested-inline.ll +++ b/llvm/test/Transforms/Inline/nested-inline.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -inline -S | FileCheck %s ; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s +; RUN: opt < %s -passes='module-inline' -S | FileCheck %s ; Test that bar and bar2 are both inlined throughout and removed. @A = weak global i32 0 ; [#uses=1] @B = weak global i32 0 ; [#uses=1] diff --git a/llvm/test/Transforms/Inline/nonnull.ll b/llvm/test/Transforms/Inline/nonnull.ll --- a/llvm/test/Transforms/Inline/nonnull.ll +++ b/llvm/test/Transforms/Inline/nonnull.ll @@ -1,5 +1,6 @@ ; RUN: opt -S -inline %s | FileCheck %s ; RUN: opt -S -passes='cgscc(inline)' %s | FileCheck %s +; RUN: opt -S -passes='module-inline' %s | FileCheck %s declare void @foo() declare void @bar() diff --git a/llvm/test/Transforms/Inline/pr21206.ll b/llvm/test/Transforms/Inline/pr21206.ll --- a/llvm/test/Transforms/Inline/pr21206.ll +++ b/llvm/test/Transforms/Inline/pr21206.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -inline -S | FileCheck %s ; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s +; RUN: opt < %s -passes='module-inline' -S | FileCheck %s $c = comdat any ; CHECK: $c = comdat any