diff --git a/llvm/include/llvm/Analysis/InliningAdvisor.h b/llvm/include/llvm/Analysis/InliningAdvisor.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Analysis/InliningAdvisor.h @@ -0,0 +1,227 @@ +//===- InliningAdvisor.h - Inlining decision making abstraction -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +#ifndef LLVM_INLININGADVISOR_H_ +#define LLVM_INLININGADVISOR_H_ + +#include +#include +#include + +#include "llvm/Analysis/InlineCost.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { +class BasicBlock; +class CallBase; +class Function; +class Module; +class PreservedAnalyses; +class OptimizationRemarkEmitter; + +/// There are 3 scenarios we can use the InliningAdvisor: +/// - Default - use manual heuristics. +/// +/// - Release mode, the expected mode for production, day to day deployments. +/// In this mode, when building the compiler, we also compile a pre-trained ML +/// model to native code, and link it as a static library. This mode has low +/// overhead and no additional dependencies for the compiler runtime. +/// +/// - Development mode, for training new models. +/// In this mode, we trade off runtime performance for flexibility. This mode +/// requires the full C Tensorflow API library, and evaluates models +/// dynamically. This mode also permits generating training logs, for offline +/// training. +enum class InliningAdvisorMode : int { Default, Release, Development }; + +class InliningAdvisor; +/// Capture state between an inlining decision having had been made, and +/// its impact being observable. When collecting model training data, this +/// allows recording features/decisions/partial reward data sets. +/// +/// Derivations of this type are expected to be tightly coupled with their +/// InliningAdvisors. The base type implements the minimal contractual +/// obligations. +class PendingInliningRecord { +public: + PendingInliningRecord(PendingInliningRecord &&) = delete; + PendingInliningRecord(const PendingInliningRecord &) = delete; + virtual ~PendingInliningRecord() { + assert(Recorded && "PendingInliningRecord should have been informed of the " + "inliner's decision in all cases"); + } + + /// Exactly one of the record* APIs must be called. Implementers may extend + /// behavior by implementing the corresponding record*Impl. + /// + /// Call after inlining succeeded, and did not result in deleting the callee. + void recordInlining() { + markRecorded(); + recordInliningImpl(); + } + + /// Call after inlining succeeded, and resulted in deleting the callee. + void recordInliningWithCalleeDeleted(); + + /// Call after the decision for a call site was to not inline. + void recordUnsuccessfulInlining(const InlineResult &Result) { + markRecorded(); + recordUnsuccessfulInliningImpl(Result); + } + + /// Call to indicate inlining was not attempted. + void recordUnattemptedInlining() { + markRecorded(); + recordUnattemptedInliningImpl(); + } + + /// Get the inlining recommendation. + bool isInliningRecommended() const { return IsInliningRecommended; } + +protected: + PendingInliningRecord(InliningAdvisor *Advisor, CallBase &CB, + bool IsInliningRecommended); + + virtual void recordInliningImpl(){}; + virtual void recordInliningWithCalleeDeletedImpl() {} + virtual void recordUnsuccessfulInliningImpl(const InlineResult &Result) {} + virtual void recordUnattemptedInliningImpl() {} + + InliningAdvisor *const Advisor; + /// Caller and Callee are pre-inlining. + Function *const Caller; + Function *const Callee; + const bool IsInliningRecommended; + +private: + void markRecorded() { + assert(!Recorded && "Recording should happen exactly once"); + Recorded = true; + } + + bool Recorded = false; +}; + +/// Interface for deciding whether to inline a call site or not. +class InliningAdvisor { +public: + InliningAdvisor(InliningAdvisor &&) = delete; + virtual ~InliningAdvisor() { freeDeletedFunctions(); }; + + /// Get a PendingInliningRecord containing a recommendation on whether to + /// inline or not. \p CB is assumed to be a direct call. \p FAM is assumed to + /// be up-to-date wrt previous inlining decisions. + /// Returns a PendingInliningRecord with the inlining recommendation. + virtual std::unique_ptr + getInliningAdvice(CallBase &CB, FunctionAnalysisManager &FAM) = 0; + + /// This must be called when the Inliner pass is entered, to allow the + /// InliningAdvisor update internal state, as result of function passes run + /// between Inliner pass runs (for the same module). + virtual void OnPassEntry(){}; + + /// This must be called when the Inliner pass is exited, as function passes + /// may be run subsequently. This allows an implementation of InliningAdvisor + /// to prepare for a partial update. + virtual void OnPassExit(){}; + +protected: + InliningAdvisor() = default; + + /// We may want to defer deleting functions to after the inlining for a whole + /// module has finished. This allows us to reliably use function pointers as + /// unique identifiers, as an efficient implementation detail of the + /// InliningAdvisor. Otherwise, it is possible the memory allocator + /// re-allocate Function objects at the same address of a deleted Function; + /// and Functions are potentially created during the function passes called + /// after each SCC inlining (e.g. argument promotion does that). + void freeDeletedFunctions(); + + bool isFunctionDeleted(Function *F) const { + return DeletedFunctions.count(F); + } + +private: + friend class PendingInliningRecord; + void markFunctionAsDeleted(Function *F); + std::unordered_set DeletedFunctions; +}; + +/// The default (manual heuristics) implementation of the InliningAdvisor. This +/// implementation does not need to keep state between inliner pass runs, and is +/// reusable as-is for inliner pass test scenarios, as well as for regular use. +class DefaultInliningAdvisor : public InliningAdvisor { +public: + DefaultInliningAdvisor(InlineParams Params) : Params(Params) {} + +private: + std::unique_ptr + getInliningAdvice(CallBase &CB, FunctionAnalysisManager &FAM) override; + + void OnPassExit() override { freeDeletedFunctions(); } + InlineParams Params; +}; + +/// The InliningAdvisorAnalysis is a module pass because the InliningAdvisor +/// needs to capture state right before inlining commences over a module. +class InliningAdvisorAnalysis + : public AnalysisInfoMixin { +public: + static AnalysisKey Key; + InliningAdvisorAnalysis() = default; + struct Result { + Result(Module &M, ModuleAnalysisManager &MAM) : M(M), MAM(MAM) {} + bool invalidate(Module &, const PreservedAnalyses &, + ModuleAnalysisManager::Invalidator &) { + // InliningAdvisor must be preserved across analysis invalidations. + return false; + } + bool tryCreate(InlineParams Params, InliningAdvisorMode Mode); + InliningAdvisor *getAdvisor() const { return Advisor.get(); } + void clear() { Advisor.reset(); } + + private: + Module &M; + ModuleAnalysisManager &MAM; + std::unique_ptr Advisor; + }; + + Result run(Module &M, ModuleAnalysisManager &MAM) { return Result(M, MAM); } +}; + +// Default (manual policy) decision making helper APIs. Shared with the legacy +// pass manager inliner. + +/// Return true if inlining of CB can block the caller from being +/// inlined which is proved to be more beneficial. \p IC is the +/// estimated inline cost associated with callsite \p CB. +/// \p TotalSecondaryCost will be set to the estimated cost of inlining the +/// caller if \p CB is suppressed for inlining. +bool shouldBeDeferred(Function *Caller, InlineCost IC, int &TotalSecondaryCost, + function_ref GetInlineCost); + +/// Return the cost only if the inliner should attempt to inline at the given +/// CallSite. If we return the cost, we will emit an optimisation remark later +/// using that cost, so we won't do so from this function. Return None if +/// inlining should not be attempted. +Optional +shouldInline(CallBase &CB, function_ref GetInlineCost, + OptimizationRemarkEmitter &ORE); + +/// Emit ORE message. +void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, + const BasicBlock *Block, const Function &Callee, + const Function &Caller, const InlineCost &IC); + +/// Set the inline-remark attribute. +void setInlineRemark(CallBase &CB, StringRef Message); + +/// Utility for extracting the inline cost message to a string. +std::string inlineCostStr(const InlineCost &IC); +} // namespace llvm +#endif // LLVM_INLININGADVISOR_H_ diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -19,6 +19,7 @@ #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/Error.h" +#include "llvm/Transforms/IPO/Inliner.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include @@ -345,9 +346,9 @@ /// Construct the module pipeline that performs inlining as well as /// the inlining-driven cleanups. - ModulePassManager buildInlinerPipeline(OptimizationLevel Level, - ThinLTOPhase Phase, - bool DebugLogging = false); + ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, + ThinLTOPhase Phase, + bool DebugLogging = false); /// Construct the core LLVM module optimization pipeline. /// diff --git a/llvm/include/llvm/Transforms/IPO/Inliner.h b/llvm/include/llvm/Transforms/IPO/Inliner.h --- a/llvm/include/llvm/Transforms/IPO/Inliner.h +++ b/llvm/include/llvm/Transforms/IPO/Inliner.h @@ -12,6 +12,7 @@ #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/InliningAdvisor.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/IR/PassManager.h" #include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" @@ -93,21 +94,47 @@ /// passes be composed to achieve the same end result. class InlinerPass : public PassInfoMixin { public: - InlinerPass(InlineParams Params = getInlineParams()) - : Params(std::move(Params)) {} + InlinerPass() = default; ~InlinerPass(); InlinerPass(InlinerPass &&Arg) - : Params(std::move(Arg.Params)), - ImportedFunctionsStats(std::move(Arg.ImportedFunctionsStats)) {} + : ImportedFunctionsStats(std::move(Arg.ImportedFunctionsStats)) {} PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR); private: - InlineParams Params; + InliningAdvisor &getAdvisor(const ModuleAnalysisManager &MAM, Module &M); std::unique_ptr ImportedFunctionsStats; + Optional OwnedDefaultAdvisor; }; +/// Module pass, wrapping the inliner pass. This works in conjunction with the +/// InliningAdvisorAnalysis to facilitate inlining decisions taking into account +/// module-wide state, that need to keep track of inter-inliner pass runs, for +/// a given module. An InliningAdvisor is configured and kept alive for the +/// duration of the ModuleInlinerWrapperPass::run. +class ModuleInlinerWrapperPass + : public PassInfoMixin { +public: + ModuleInlinerWrapperPass( + InlineParams Params = getInlineParams(), bool Debugging = false, + InliningAdvisorMode Mode = InliningAdvisorMode::Default, + unsigned MaxDevirtIterations = 0); + ModuleInlinerWrapperPass(ModuleInlinerWrapperPass &&Arg) = default; + + PreservedAnalyses run(Module &, ModuleAnalysisManager &); + + /// Allow adding more CGSCC passes, besides inlining. This should be called + /// before run is called, as part of pass pipeline building. + CGSCCPassManager &getPM() { return PM; } + +private: + const InlineParams Params; + const InliningAdvisorMode Mode; + const unsigned MaxDevirtIterations; + const bool Debugging; + CGSCCPassManager PM; +}; } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_INLINER_H diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -40,6 +40,7 @@ IVUsers.cpp IndirectCallPromotionAnalysis.cpp InlineCost.cpp + InliningAdvisor.cpp InstCount.cpp InstructionPrecedenceTracking.cpp InstructionSimplify.cpp diff --git a/llvm/lib/Analysis/InliningAdvisor.cpp b/llvm/lib/Analysis/InliningAdvisor.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Analysis/InliningAdvisor.cpp @@ -0,0 +1,377 @@ +//===- InliningAdvisor.cpp - analysis pass implementation -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements InliningAdvisorAnalysis and DefaultInliningAdvisor, and +// related types. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/InliningAdvisor.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/raw_ostream.h" + +#include + +using namespace llvm; +#define DEBUG_TYPE "inline" + +// This weirdly named statistic tracks the number of times that, when attempting +// to inline a function A into B, we analyze the callers of B in order to see +// if those would be more profitable and blocked inline steps. +STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); + +/// Flag to add inline messages as callsite attributes 'inline-remark'. +static cl::opt + InlineRemarkAttribute("inline-remark-attribute", cl::init(false), + cl::Hidden, + cl::desc("Enable adding inline-remark attribute to" + " callsites processed by inliner but decided" + " to be not inlined")); + +// An integer used to limit the cost of inline deferral. The default negative +// number tells shouldBeDeferred to only take the secondary cost into account. +static cl::opt + InlineDeferralScale("inline-deferral-scale", + cl::desc("Scale to limit the cost of inline deferral"), + cl::init(-1), cl::Hidden); + +namespace llvm { +std::basic_ostream &operator<<(std::basic_ostream &R, + const ore::NV &Arg) { + return R << Arg.Val; +} + +template +RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) { + using namespace ore; + if (IC.isAlways()) { + R << "(cost=always)"; + } else if (IC.isNever()) { + R << "(cost=never)"; + } else { + R << "(cost=" << ore::NV("Cost", IC.getCost()) + << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")"; + } + if (const char *Reason = IC.getReason()) + R << ": " << ore::NV("Reason", Reason); + return R; +} +} // namespace llvm + +namespace { +class DefaultPendingRecord : public PendingInliningRecord { +public: + DefaultPendingRecord(DefaultInliningAdvisor *Advisor, CallBase &CB, + Optional OIC, OptimizationRemarkEmitter &ORE) + : PendingInliningRecord(Advisor, CB, OIC.hasValue()), OriginalCB(&CB), + OIC(OIC), ORE(ORE), DLoc(CB.getDebugLoc()), Block(CB.getParent()) {} + +private: + void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { + using namespace ore; + llvm::setInlineRemark(*OriginalCB, std::string(Result.getFailureReason()) + + "; " + inlineCostStr(*OIC)); + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) + << NV("Callee", Callee) << " will not be inlined into " + << NV("Caller", Caller) << ": " + << NV("Reason", Result.getFailureReason()); + }); + } + + void recordInliningWithCalleeDeletedImpl() override { + emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC); + } + + void recordInliningImpl() override { + emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC); + } + +private: + CallBase *const OriginalCB; + Optional OIC; + OptimizationRemarkEmitter &ORE; + + // Capture the context of CB before inlining, as a successful inlining may + // change that context, and we want to report success or failure in the + // original context. + const DebugLoc DLoc; + const BasicBlock *const Block; +}; + +} // namespace + +std::unique_ptr +DefaultInliningAdvisor::getInliningAdvice(CallBase &CB, + FunctionAnalysisManager &FAM) { + Function &Callee = *CB.getCalledFunction(); + Function &F = *CB.getCaller(); + ProfileSummaryInfo *PSI = FAM.getResult(F) + .getManager() + .getCachedResult( + *CB.getParent()->getParent()->getParent()); + + auto &ORE = FAM.getResult(F); + std::function GetAssumptionCache = + [&](Function &F) -> AssumptionCache & { + return FAM.getResult(Callee); + }; + auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & { + return FAM.getResult(F); + }; + auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { + return FAM.getResult(F); + }; + + auto GetInlineCost = [&](CallBase &CB) { + Function &Callee = *CB.getCalledFunction(); + auto &CalleeTTI = FAM.getResult(Callee); + bool RemarksEnabled = + Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled( + DEBUG_TYPE); + return getInlineCost(CB, Params, CalleeTTI, GetAssumptionCache, {GetBFI}, + GetTLI, PSI, RemarksEnabled ? &ORE : nullptr); + }; + auto OIC = llvm::shouldInline(CB, GetInlineCost, ORE); + std::unique_ptr Ret( + new DefaultPendingRecord(this, CB, OIC, ORE)); + return Ret; +} + +PendingInliningRecord::PendingInliningRecord(InliningAdvisor *Advisor, + CallBase &CB, + bool IsInliningRecommended) + : Advisor(Advisor), Caller(CB.getCaller()), Callee(CB.getCalledFunction()), + IsInliningRecommended(IsInliningRecommended) {} + +void InliningAdvisor::markFunctionAsDeleted(Function *F) { + assert((!DeletedFunctions.count(F)) && + "Cannot put cause a function to become dead twice!"); + DeletedFunctions.insert(F); +} + +void InliningAdvisor::freeDeletedFunctions() { + for (auto *F : DeletedFunctions) + delete (F); + DeletedFunctions.clear(); +} + +void PendingInliningRecord::recordInliningWithCalleeDeleted() { + markRecorded(); + Advisor->markFunctionAsDeleted(Callee); + recordInliningWithCalleeDeletedImpl(); +} + +AnalysisKey InliningAdvisorAnalysis::Key; + +bool InliningAdvisorAnalysis::Result::tryCreate(InlineParams Params, + InliningAdvisorMode Mode) { + switch (Mode) { + case InliningAdvisorMode::Default: + Advisor.reset(new DefaultInliningAdvisor(Params)); + break; + case InliningAdvisorMode::Development: + // To be added subsequently under conditional compilation. + break; + case InliningAdvisorMode::Release: + // To be added subsequently under conditional compilation. + break; + } + return !!Advisor; +} + +void llvm::emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, + const BasicBlock *Block, const Function &Callee, + const Function &Caller, const InlineCost &IC) { + using namespace ore; + ORE.emit([&]() { + bool AlwaysInline = IC.isAlways(); + StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined"; + return OptimizationRemark(DEBUG_TYPE, RemarkName, DLoc, Block) + << ore::NV("Callee", &Callee) << " inlined into " + << ore::NV("Caller", &Caller) << " with " << IC; + }); +} + +/// Return true if inlining of CB can block the caller from being +/// inlined which is proved to be more beneficial. \p IC is the +/// estimated inline cost associated with callsite \p CB. +/// \p TotalSecondaryCost will be set to the estimated cost of inlining the +/// caller if \p CB is suppressed for inlining. +bool llvm::shouldBeDeferred( + Function *Caller, InlineCost IC, int &TotalSecondaryCost, + function_ref GetInlineCost) { + // For now we only handle local or inline functions. + if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage()) + return false; + // If the cost of inlining CB is non-positive, it is not going to prevent the + // caller from being inlined into its callers and hence we don't need to + // defer. + if (IC.getCost() <= 0) + return false; + // Try to detect the case where the current inlining candidate caller (call + // it B) is a static or linkonce-ODR function and is an inlining candidate + // elsewhere, and the current candidate callee (call it C) is large enough + // that inlining it into B would make B too big to inline later. In these + // circumstances it may be best not to inline C into B, but to inline B into + // its callers. + // + // This only applies to static and linkonce-ODR functions because those are + // expected to be available for inlining in the translation units where they + // are used. Thus we will always have the opportunity to make local inlining + // decisions. Importantly the linkonce-ODR linkage covers inline functions + // and templates in C++. + // + // FIXME: All of this logic should be sunk into getInlineCost. It relies on + // the internal implementation of the inline cost metrics rather than + // treating them as truly abstract units etc. + TotalSecondaryCost = 0; + // The candidate cost to be imposed upon the current function. + int CandidateCost = IC.getCost() - 1; + // If the caller has local linkage and can be inlined to all its callers, we + // can apply a huge negative bonus to TotalSecondaryCost. + bool ApplyLastCallBonus = Caller->hasLocalLinkage() && !Caller->hasOneUse(); + // This bool tracks what happens if we DO inline C into B. + bool InliningPreventsSomeOuterInline = false; + unsigned NumCallerUsers = 0; + for (User *U : Caller->users()) { + CallBase *CS2 = dyn_cast(U); + + // If this isn't a call to Caller (it could be some other sort + // of reference) skip it. Such references will prevent the caller + // from being removed. + if (!CS2 || CS2->getCalledFunction() != Caller) { + ApplyLastCallBonus = false; + continue; + } + + InlineCost IC2 = GetInlineCost(*CS2); + ++NumCallerCallersAnalyzed; + if (!IC2) { + ApplyLastCallBonus = false; + continue; + } + if (IC2.isAlways()) + continue; + + // See if inlining of the original callsite would erase the cost delta of + // this callsite. We subtract off the penalty for the call instruction, + // which we would be deleting. + if (IC2.getCostDelta() <= CandidateCost) { + InliningPreventsSomeOuterInline = true; + TotalSecondaryCost += IC2.getCost(); + NumCallerUsers++; + } + } + + if (!InliningPreventsSomeOuterInline) + return false; + + // If all outer calls to Caller would get inlined, the cost for the last + // one is set very low by getInlineCost, in anticipation that Caller will + // be removed entirely. We did not account for this above unless there + // is only one caller of Caller. + if (ApplyLastCallBonus) + TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus; + + // If InlineDeferralScale is negative, then ignore the cost of primary + // inlining -- IC.getCost() multiplied by the number of callers to Caller. + if (InlineDeferralScale < 0) + return TotalSecondaryCost < IC.getCost(); + + int TotalCost = TotalSecondaryCost + IC.getCost() * NumCallerUsers; + int Allowance = IC.getCost() * InlineDeferralScale; + return TotalCost < Allowance; +} + +void llvm::setInlineRemark(CallBase &CB, StringRef Message) { + if (!InlineRemarkAttribute) + return; + + Attribute Attr = Attribute::get(CB.getContext(), "inline-remark", Message); + CB.addAttribute(AttributeList::FunctionIndex, Attr); +} + +/// Return the cost only if the inliner should attempt to inline at the given +/// CallSite. If we return the cost, we will emit an optimisation remark later +/// using that cost, so we won't do so from this function. Return None if +/// inlining should not be attempted. +Optional +llvm::shouldInline(CallBase &CB, + function_ref GetInlineCost, + OptimizationRemarkEmitter &ORE) { + using namespace ore; + + InlineCost IC = GetInlineCost(CB); + Instruction *Call = &CB; + Function *Callee = CB.getCalledFunction(); + Function *Caller = CB.getCaller(); + + if (IC.isAlways()) { + LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) + << ", Call: " << CB << "\n"); + return IC; + } + + if (!IC) { + LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC) + << ", Call: " << CB << "\n"); + if (IC.isNever()) { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) + << NV("Callee", Callee) << " not inlined into " + << NV("Caller", Caller) << " because it should never be inlined " + << IC; + }); + } else { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call) + << NV("Callee", Callee) << " not inlined into " + << NV("Caller", Caller) << " because too costly to inline " + << IC; + }); + } + setInlineRemark(CB, inlineCostStr(IC)); + return None; + } + + int TotalSecondaryCost = 0; + if (shouldBeDeferred(Caller, IC, TotalSecondaryCost, GetInlineCost)) { + LLVM_DEBUG(dbgs() << " NOT Inlining: " << CB + << " Cost = " << IC.getCost() + << ", outer Cost = " << TotalSecondaryCost << '\n'); + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts", + Call) + << "Not inlining. Cost of inlining " << NV("Callee", Callee) + << " increases the cost of inlining " << NV("Caller", Caller) + << " in other contexts"; + }); + setInlineRemark(CB, "deferred"); + // IC does not bool() to false, so get an InlineCost that will. + // This will not be inspected to make an error message. + return None; + } + + LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) << ", Call: " << CB + << '\n'); + return IC; +} + +std::string llvm::inlineCostStr(const InlineCost &IC) { + std::stringstream Remark; + Remark << IC; + return Remark.str(); +} diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -33,6 +33,7 @@ #include "llvm/Analysis/DominanceFrontier.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/InliningAdvisor.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/LoopAccessAnalysis.h" @@ -215,6 +216,16 @@ "enable-npm-gvn-hoist", cl::init(false), cl::Hidden, cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); +static cl::opt UseInliningAdvisor( + "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, + cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), + cl::values(clEnumValN(InliningAdvisorMode::Default, "default", + "Heuristics-based inliner version."), + clEnumValN(InliningAdvisorMode::Development, "development", + "Use development mode (runtime-loadable model)."), + clEnumValN(InliningAdvisorMode::Release, "release", + "Use release mode (AOT-compiled model)."))); + static cl::opt EnableGVNSink( "enable-npm-gvn-sink", cl::init(false), cl::Hidden, cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); @@ -611,10 +622,8 @@ // This should probably be lowered after performance testing. // FIXME: this comment is cargo culted from the old pass manager, revisit). IP.HintThreshold = 325; - - CGSCCPassManager CGPipeline(DebugLogging); - - CGPipeline.addPass(InlinerPass(IP)); + ModuleInlinerWrapperPass MIWP(IP, DebugLogging); + CGSCCPassManager &CGPipeline = MIWP.getPM(); FunctionPassManager FPM; FPM.addPass(SROA()); @@ -625,7 +634,7 @@ CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); - MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPipeline))); + MPM.addPass(std::move(MIWP)); // Delete anything that is now dead to make sure that we don't instrument // dead code. Instrumentation can end up keeping dead code around and @@ -690,33 +699,28 @@ return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel()); } -ModulePassManager PassBuilder::buildInlinerPipeline(OptimizationLevel Level, - ThinLTOPhase Phase, - bool DebugLogging) { - ModulePassManager MPM(DebugLogging); +ModuleInlinerWrapperPass +PassBuilder::buildInlinerPipeline(OptimizationLevel Level, ThinLTOPhase Phase, + bool DebugLogging) { + InlineParams IP = getInlineParamsFromOptLevel(Level); + if (Phase == PassBuilder::ThinLTOPhase::PreLink && PGOOpt && + PGOOpt->Action == PGOOptions::SampleUse) + IP.HotCallSiteThreshold = 0; + + ModuleInlinerWrapperPass MIWP(IP, DebugLogging, UseInliningAdvisor, + MaxDevirtIterations); // Now begin the main postorder CGSCC pipeline. // FIXME: The current CGSCC pipeline has its origins in the legacy pass // manager and trying to emulate its precise behavior. Much of this doesn't // make a lot of sense and we should revisit the core CGSCC structure. - CGSCCPassManager MainCGPipeline(DebugLogging); + CGSCCPassManager &MainCGPipeline = MIWP.getPM(); // Note: historically, the PruneEH pass was run first to deduce nounwind and // generally clean up exception handling overhead. It isn't clear this is // valuable as the inliner doesn't currently care whether it is inlining an // invoke or a call. - // Run the inliner first. The theory is that we are walking bottom-up and so - // the callees have already been fully optimized, and we want to inline them - // into the callers so that our optimizations can reflect that. - // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO - // because it makes profile annotation in the backend inaccurate. - InlineParams IP = getInlineParamsFromOptLevel(Level); - if (Phase == ThinLTOPhase::PreLink && PGOOpt && - PGOOpt->Action == PGOOptions::SampleUse) - IP.HotCallSiteThreshold = 0; - MainCGPipeline.addPass(InlinerPass(IP)); - if (AttributorRun & AttributorRunOption::CGSCC) MainCGPipeline.addPass(AttributorCGSCCPass()); @@ -744,15 +748,7 @@ for (auto &C : CGSCCOptimizerLateEPCallbacks) C(MainCGPipeline, Level); - // We wrap the CGSCC pipeline in a devirtualization repeater. This will try - // to detect when we devirtualize indirect calls and iterate the SCC passes - // in that case to try and catch knock-on inlining or function attrs - // opportunities. Then we add it to the module pipeline by walking the SCCs - // in postorder (or bottom-up). - MPM.addPass( - createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass( - std::move(MainCGPipeline), MaxDevirtIterations))); - return MPM; + return MIWP; } ModulePassManager PassBuilder::buildModuleSimplificationPipeline( @@ -1327,8 +1323,8 @@ // valuable as the inliner doesn't currently care whether it is inlining an // invoke or a call. // Run the inliner now. - MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( - InlinerPass(getInlineParamsFromOptLevel(Level)))); + MPM.addPass(ModuleInlinerWrapperPass(getInlineParamsFromOptLevel(Level), + DebugLogging)); // Optimize globals again after we ran the inliner. MPM.addPass(GlobalOptPass()); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -27,6 +27,7 @@ MODULE_ANALYSIS("verify", VerifierAnalysis()) MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) MODULE_ANALYSIS("asan-globals-md", ASanGlobalsMetadataAnalysis()) +MODULE_ANALYSIS("inlining-advisor", InliningAdvisorAnalysis()) #ifndef MODULE_ALIAS_ANALYSIS #define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ @@ -57,6 +58,7 @@ MODULE_PASS("hwasan", HWAddressSanitizerPass(false, false)) MODULE_PASS("khwasan", HWAddressSanitizerPass(true, true)) MODULE_PASS("inferattrs", InferFunctionAttrsPass()) +MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass()) MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass()) MODULE_PASS("instrorderfile", InstrOrderFilePass()) MODULE_PASS("instrprof", InstrProfiling()) diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -29,6 +30,7 @@ #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/InliningAdvisor.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" @@ -77,11 +79,6 @@ STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); STATISTIC(NumMergedAllocas, "Number of allocas merged together"); -// This weirdly named statistic tracks the number of times that, when attempting -// to inline a function A into B, we analyze the callers of B in order to see -// if those would be more profitable and blocked inline steps. -STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); - /// Flag to disable manual alloca merging. /// /// Merging of allocas was originally done as a stack-size saving technique @@ -93,13 +90,6 @@ DisableInlinedAllocaMerging("disable-inlined-alloca-merging", cl::init(false), cl::Hidden); -// An integer used to limit the cost of inline deferral. The default negative -// number tells shouldBeDeferred to only take the secondary cost into account. -static cl::opt - InlineDeferralScale("inline-deferral-scale", - cl::desc("Scale to limit the cost of inline deferral"), - cl::init(-1), cl::Hidden); - namespace { enum class InlinerFunctionImportStatsOpts { @@ -119,14 +109,6 @@ "printing of statistics for each inlined function")), cl::Hidden, cl::desc("Enable inliner stats for imported functions")); -/// Flag to add inline messages as callsite attributes 'inline-remark'. -static cl::opt - InlineRemarkAttribute("inline-remark-attribute", cl::init(false), - cl::Hidden, - cl::desc("Enable adding inline-remark attribute to" - " callsites processed by inliner but decided" - " to be not inlined")); - LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {} LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime) @@ -305,197 +287,6 @@ return IR; // success } -/// Return true if inlining of CB can block the caller from being -/// inlined which is proved to be more beneficial. \p IC is the -/// estimated inline cost associated with callsite \p CB. -/// \p TotalSecondaryCost will be set to the estimated cost of inlining the -/// caller if \p CB is suppressed for inlining. -static bool -shouldBeDeferred(Function *Caller, InlineCost IC, int &TotalSecondaryCost, - function_ref GetInlineCost) { - // For now we only handle local or inline functions. - if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage()) - return false; - // If the cost of inlining CB is non-positive, it is not going to prevent the - // caller from being inlined into its callers and hence we don't need to - // defer. - if (IC.getCost() <= 0) - return false; - // Try to detect the case where the current inlining candidate caller (call - // it B) is a static or linkonce-ODR function and is an inlining candidate - // elsewhere, and the current candidate callee (call it C) is large enough - // that inlining it into B would make B too big to inline later. In these - // circumstances it may be best not to inline C into B, but to inline B into - // its callers. - // - // This only applies to static and linkonce-ODR functions because those are - // expected to be available for inlining in the translation units where they - // are used. Thus we will always have the opportunity to make local inlining - // decisions. Importantly the linkonce-ODR linkage covers inline functions - // and templates in C++. - // - // FIXME: All of this logic should be sunk into getInlineCost. It relies on - // the internal implementation of the inline cost metrics rather than - // treating them as truly abstract units etc. - TotalSecondaryCost = 0; - // The candidate cost to be imposed upon the current function. - int CandidateCost = IC.getCost() - 1; - // If the caller has local linkage and can be inlined to all its callers, we - // can apply a huge negative bonus to TotalSecondaryCost. - bool ApplyLastCallBonus = Caller->hasLocalLinkage() && !Caller->hasOneUse(); - // This bool tracks what happens if we DO inline C into B. - bool InliningPreventsSomeOuterInline = false; - unsigned NumCallerUsers = 0; - for (User *U : Caller->users()) { - CallBase *CS2 = dyn_cast(U); - - // If this isn't a call to Caller (it could be some other sort - // of reference) skip it. Such references will prevent the caller - // from being removed. - if (!CS2 || CS2->getCalledFunction() != Caller) { - ApplyLastCallBonus = false; - continue; - } - - InlineCost IC2 = GetInlineCost(*CS2); - ++NumCallerCallersAnalyzed; - if (!IC2) { - ApplyLastCallBonus = false; - continue; - } - if (IC2.isAlways()) - continue; - - // See if inlining of the original callsite would erase the cost delta of - // this callsite. We subtract off the penalty for the call instruction, - // which we would be deleting. - if (IC2.getCostDelta() <= CandidateCost) { - InliningPreventsSomeOuterInline = true; - TotalSecondaryCost += IC2.getCost(); - NumCallerUsers++; - } - } - - if (!InliningPreventsSomeOuterInline) - return false; - - // If all outer calls to Caller would get inlined, the cost for the last - // one is set very low by getInlineCost, in anticipation that Caller will - // be removed entirely. We did not account for this above unless there - // is only one caller of Caller. - if (ApplyLastCallBonus) - TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus; - - // If InlineDeferralScale is negative, then ignore the cost of primary - // inlining -- IC.getCost() multiplied by the number of callers to Caller. - if (InlineDeferralScale < 0) - return TotalSecondaryCost < IC.getCost(); - - int TotalCost = TotalSecondaryCost + IC.getCost() * NumCallerUsers; - int Allowance = IC.getCost() * InlineDeferralScale; - return TotalCost < Allowance; -} - -static std::basic_ostream &operator<<(std::basic_ostream &R, - const ore::NV &Arg) { - return R << Arg.Val; -} - -template -RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) { - using namespace ore; - if (IC.isAlways()) { - R << "(cost=always)"; - } else if (IC.isNever()) { - R << "(cost=never)"; - } else { - R << "(cost=" << ore::NV("Cost", IC.getCost()) - << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")"; - } - if (const char *Reason = IC.getReason()) - R << ": " << ore::NV("Reason", Reason); - return R; -} - -static std::string inlineCostStr(const InlineCost &IC) { - std::stringstream Remark; - Remark << IC; - return Remark.str(); -} - -static void setInlineRemark(CallBase &CB, StringRef Message) { - if (!InlineRemarkAttribute) - return; - - Attribute Attr = Attribute::get(CB.getContext(), "inline-remark", Message); - CB.addAttribute(AttributeList::FunctionIndex, Attr); -} - -/// Return the cost only if the inliner should attempt to inline at the given -/// CallSite. If we return the cost, we will emit an optimisation remark later -/// using that cost, so we won't do so from this function. Return None if -/// inlining should not be attempted. -static Optional -shouldInline(CallBase &CB, function_ref GetInlineCost, - OptimizationRemarkEmitter &ORE) { - using namespace ore; - - InlineCost IC = GetInlineCost(CB); - Instruction *Call = &CB; - Function *Callee = CB.getCalledFunction(); - Function *Caller = CB.getCaller(); - - if (IC.isAlways()) { - LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) - << ", Call: " << CB << "\n"); - return IC; - } - - if (!IC) { - LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC) - << ", Call: " << CB << "\n"); - if (IC.isNever()) { - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) - << NV("Callee", Callee) << " not inlined into " - << NV("Caller", Caller) << " because it should never be inlined " - << IC; - }); - } else { - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call) - << NV("Callee", Callee) << " not inlined into " - << NV("Caller", Caller) << " because too costly to inline " - << IC; - }); - } - setInlineRemark(CB, inlineCostStr(IC)); - return None; - } - - int TotalSecondaryCost = 0; - if (shouldBeDeferred(Caller, IC, TotalSecondaryCost, GetInlineCost)) { - LLVM_DEBUG(dbgs() << " NOT Inlining: " << CB - << " Cost = " << IC.getCost() - << ", outer Cost = " << TotalSecondaryCost << '\n'); - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts", - Call) - << "Not inlining. Cost of inlining " << NV("Callee", Callee) - << " increases the cost of inlining " << NV("Caller", Caller) - << " in other contexts"; - }); - setInlineRemark(CB, "deferred"); - // IC does not bool() to false, so get an InlineCost that will. - // This will not be inspected to make an error message. - return None; - } - - LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) << ", Call: " << CB - << '\n'); - return IC; -} - /// Return true if the specified inline history ID /// indicates an inline history that includes the specified function. static bool inlineHistoryIncludes( @@ -523,18 +314,6 @@ return inlineCalls(SCC); } -static void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc &DLoc, - const BasicBlock *Block, const Function &Callee, - const Function &Caller, const InlineCost &IC) { - ORE.emit([&]() { - bool AlwaysInline = IC.isAlways(); - StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined"; - return OptimizationRemark(DEBUG_TYPE, RemarkName, DLoc, Block) - << ore::NV("Callee", &Callee) << " inlined into " - << ore::NV("Caller", &Caller) << " with " << IC; - }); -} - static bool inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, std::function GetAssumptionCache, @@ -886,6 +665,22 @@ } } +InliningAdvisor &InlinerPass::getAdvisor(const ModuleAnalysisManager &MAM, + Module &M) { + auto *IAA = MAM.getCachedResult(M); + if (!IAA) { + // It should still be possible to run the inliner as a stand-alone SCC pass, + // for test scenarios. In that case, we default to the + // DefaultInliningAdvisor, which doesn't need to keep state between SCC pass + // runs. It also uses just the default InlineParams. + OwnedDefaultAdvisor.emplace(getInlineParams()); + return OwnedDefaultAdvisor.getValue(); + } + assert(IAA->getAdvisor() && "Expected a present InliningAdvisorAnalysis also have an " + "InliningAdvisor initialized"); + return *IAA->getAdvisor(); +} + PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR) { @@ -897,6 +692,11 @@ Module &M = *InitialC.begin()->getFunction().getParent(); ProfileSummaryInfo *PSI = MAM.getCachedResult(M); + InliningAdvisor &Advisor = getAdvisor(MAM, M); + Advisor.OnPassEntry(); + + auto AdvisorOnExit = make_scope_exit([&] { Advisor.OnPassExit(); }); + if (!ImportedFunctionsStats && InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) { ImportedFunctionsStats = @@ -1011,29 +811,10 @@ FunctionAnalysisManager &FAM = AM.getResult(*C, CG).getManager(); - // Get the remarks emission analysis for the caller. - auto &ORE = FAM.getResult(F); - std::function GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return FAM.getResult(F); }; - auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & { - return FAM.getResult(F); - }; - auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { - return FAM.getResult(F); - }; - - auto GetInlineCost = [&](CallBase &CB) { - Function &Callee = *CB.getCalledFunction(); - auto &CalleeTTI = FAM.getResult(Callee); - bool RemarksEnabled = - Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled( - DEBUG_TYPE); - return getInlineCost(CB, Params, CalleeTTI, GetAssumptionCache, {GetBFI}, - GetTLI, PSI, RemarksEnabled ? &ORE : nullptr); - }; // Now process as many calls as we have within this caller in the sequnece. // We bail out as soon as the caller has to change so we can update the @@ -1065,101 +846,88 @@ continue; } - auto OIC = shouldInline(*CB, GetInlineCost, ORE); + auto PendingRecord = Advisor.getInliningAdvice(*CB, FAM); // Check whether we want to inline this callsite. - if (!OIC) + if (!PendingRecord->isInliningRecommended()) { + PendingRecord->recordUnattemptedInlining(); continue; - auto DoInline = [&]() -> InlineResult { - // Setup the data structure used to plumb customization into the - // `InlineFunction` routine. - InlineFunctionInfo IFI( - /*cg=*/nullptr, &GetAssumptionCache, PSI, - &FAM.getResult(*(CB->getCaller())), - &FAM.getResult(Callee)); - - InlineResult IR = InlineFunction(*CB, IFI); - if (!IR.isSuccess()) - return IR; - - DidInline = true; - InlinedCallees.insert(&Callee); - ++NumInlined; + } - // Add any new callsites to defined functions to the worklist. - if (!IFI.InlinedCallSites.empty()) { - int NewHistoryID = InlineHistory.size(); - InlineHistory.push_back({&Callee, InlineHistoryID}); - - for (CallBase *ICB : reverse(IFI.InlinedCallSites)) { - Function *NewCallee = ICB->getCalledFunction(); - if (!NewCallee) { - // Try to promote an indirect (virtual) call without waiting for - // the post-inline cleanup and the next DevirtSCCRepeatedPass - // iteration because the next iteration may not happen and we may - // miss inlining it. - if (tryPromoteCall(*ICB)) - NewCallee = ICB->getCalledFunction(); - } - if (NewCallee) - if (!NewCallee->isDeclaration()) - Calls.push_back({ICB, NewHistoryID}); - } - } + // Setup the data structure used to plumb customization into the + // `InlineFunction` routine. + InlineFunctionInfo IFI( + /*cg=*/nullptr, &GetAssumptionCache, PSI, + &FAM.getResult(*(CB->getCaller())), + &FAM.getResult(Callee)); + + InlineResult IR = InlineFunction(*CB, IFI); + if (!IR.isSuccess()) { + PendingRecord->recordUnsuccessfulInlining(IR); + continue; + } - if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) - ImportedFunctionsStats->recordInline(F, Callee); - - // Merge the attributes based on the inlining. - AttributeFuncs::mergeAttributesForInlining(F, Callee); - - // For local functions, check whether this makes the callee trivially - // dead. In that case, we can drop the body of the function eagerly - // which may reduce the number of callers of other functions to one, - // changing inline cost thresholds. - if (Callee.hasLocalLinkage()) { - // To check this we also need to nuke any dead constant uses (perhaps - // made dead by this operation on other functions). - Callee.removeDeadConstantUsers(); - if (Callee.use_empty() && !CG.isLibFunction(Callee)) { - Calls.erase( - std::remove_if(Calls.begin() + I + 1, Calls.end(), - [&](const std::pair &Call) { - return Call.first->getCaller() == &Callee; - }), - Calls.end()); - // Clear the body and queue the function itself for deletion when we - // finish inlining and call graph updates. - // Note that after this point, it is an error to do anything other - // than use the callee's address or delete it. - Callee.dropAllReferences(); - assert(find(DeadFunctions, &Callee) == DeadFunctions.end() && - "Cannot put cause a function to become dead twice!"); - DeadFunctions.push_back(&Callee); + DidInline = true; + InlinedCallees.insert(&Callee); + ++NumInlined; + + // Add any new callsites to defined functions to the worklist. + if (!IFI.InlinedCallSites.empty()) { + int NewHistoryID = InlineHistory.size(); + InlineHistory.push_back({&Callee, InlineHistoryID}); + + for (CallBase *ICB : reverse(IFI.InlinedCallSites)) { + Function *NewCallee = ICB->getCalledFunction(); + if (!NewCallee) { + // Try to promote an indirect (virtual) call without waiting for + // the post-inline cleanup and the next DevirtSCCRepeatedPass + // iteration because the next iteration may not happen and we may + // miss inlining it. + if (tryPromoteCall(*ICB)) + NewCallee = ICB->getCalledFunction(); } + if (NewCallee) + if (!NewCallee->isDeclaration()) + Calls.push_back({ICB, NewHistoryID}); } - return IR; - }; - // Capture the context of CB before inlining, as a successful inlining may - // change that context, and we want to report success or failure in the - // original context. - auto DLoc = CB->getDebugLoc(); - auto *Block = CB->getParent(); - - auto Outcome = DoInline(); - if (!Outcome.isSuccess()) { - using namespace ore; - setInlineRemark(*CB, std::string(Outcome.getFailureReason()) + "; " + - inlineCostStr(*OIC)); - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) - << NV("Callee", &Callee) << " will not be inlined into " - << NV("Caller", &F) << ": " - << NV("Reason", Outcome.getFailureReason()); - }); - continue; } - emitInlinedInto(ORE, DLoc, Block, Callee, F, *OIC); + if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) + ImportedFunctionsStats->recordInline(F, Callee); + + // Merge the attributes based on the inlining. + AttributeFuncs::mergeAttributesForInlining(F, Callee); + + // For local functions, check whether this makes the callee trivially + // dead. In that case, we can drop the body of the function eagerly + // which may reduce the number of callers of other functions to one, + // changing inline cost thresholds. + bool CalleeWasDeleted = false; + if (Callee.hasLocalLinkage()) { + // To check this we also need to nuke any dead constant uses (perhaps + // made dead by this operation on other functions). + Callee.removeDeadConstantUsers(); + if (Callee.use_empty() && !CG.isLibFunction(Callee)) { + Calls.erase( + std::remove_if(Calls.begin() + I + 1, Calls.end(), + [&](const std::pair &Call) { + return Call.first->getCaller() == &Callee; + }), + Calls.end()); + // Clear the body and queue the function itself for deletion when we + // finish inlining and call graph updates. + // Note that after this point, it is an error to do anything other + // than use the callee's address or delete it. + Callee.dropAllReferences(); + assert(find(DeadFunctions, &Callee) == DeadFunctions.end() && + "Cannot put cause a function to become dead twice!"); + DeadFunctions.push_back(&Callee); + CalleeWasDeleted = true; + } + } + if (CalleeWasDeleted) + PendingRecord->recordInliningWithCalleeDeleted(); + else + PendingRecord->recordInlining(); } // Back the call index up by one to put us in a good position to go around @@ -1235,7 +1003,7 @@ // sets. for (Function *DeadF : DeadFunctions) { // Get the necessary information out of the call graph and nuke the - // function there. Also, cclear out any cached analyses. + // function there. Also, clear out any cached analyses. auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF)); FunctionAnalysisManager &FAM = AM.getResult(DeadC, CG).getManager(); @@ -1250,7 +1018,15 @@ UR.InvalidatedRefSCCs.insert(&DeadRC); // And delete the actual function from the module. - M.getFunctionList().erase(DeadF); + // The Advisor may use Function pointers to efficiently index various + // internal maps, e.g. for memoization. Function cleanup passes like + // argument promotion create new functions. It is possible for a new + // function to be allocated at the address of a deleted function. We could + // index using names, but that's inefficient. Alternatively, we let the + // Advisor free the functions when it sees fit. + DeadF->getBasicBlockList().clear(); + M.getFunctionList().remove(DeadF); + ++NumDeleted; } @@ -1263,3 +1039,44 @@ PA.preserve(); return PA; } + +ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params, + bool Debugging, + InliningAdvisorMode Mode, + unsigned MaxDevirtIterations) + : Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations), + Debugging(Debugging), PM(Debugging) { + // Run the inliner first. The theory is that we are walking bottom-up and so + // the callees have already been fully optimized, and we want to inline them + // into the callers so that our optimizations can reflect that. + // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO + // because it makes profile annotation in the backend inaccurate. + PM.addPass(InlinerPass()); +} + +PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M, + ModuleAnalysisManager &MAM) { + auto &IAA = MAM.getResult(M); + if (!IAA.tryCreate(Params, Mode)) { + M.getContext().emitError( + "Could not setup Inlining Advisor for the requested " + "mode and/or options"); + return PreservedAnalyses::all(); + } + + ModulePassManager MPM(Debugging); + // If devirtualization iterations are requested, we wrap the CGSCC pipeline in + // a devirtualization repeater. This will try to detect when we devirtualize + // indirect calls and iterate the SCC passes in that case to try and catch + // knock-on inlining or function attrs opportunities. Then we add it to the + // module pipeline by walking the SCCs in postorder (or bottom-up). + if (MaxDevirtIterations > 0) + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( + createDevirtSCCRepeatedPass(std::move(PM), MaxDevirtIterations))); + else + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(PM))); + auto Ret = MPM.run(M, MAM); + + IAA.clear(); + return Ret; +} diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -132,7 +132,8 @@ ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass +; CHECK-O-NEXT: Running analysis: InliningAdvisorAnalysis ; CHECK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -73,7 +73,14 @@ ; CHECK-O2-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass ; CHECK-O2-NEXT: Finished llvm::Function pass manager run. -; CHECK-O2-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}InlinerPass> +; CHECK-O2-NEXT: Running pass: ModuleInlinerWrapperPass +; CHECK-O2-NEXT: Running analysis: InliningAdvisorAnalysis +; CHECK-O2-NEXT: Starting llvm::Module pass manager run. +; CHECK-O2-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> +; CHECK-O2-NEXT: Starting CGSCC pass manager run. +; CHECK-O2-NEXT: Running pass: InlinerPass +; CHECK-O2-NEXT: Finished CGSCC pass manager run. +; CHECK-O2-NEXT: Finished llvm::Module pass manager run. ; CHECK-O2-NEXT: Running pass: GlobalOptPass ; CHECK-O2-NEXT: Running pass: GlobalDCEPass ; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -97,7 +97,8 @@ ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-PRELINK-O-NEXT: Running analysis: ProfileSummaryAnalysis -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass +; CHECK-O-NEXT: Running analysis: InliningAdvisorAnalysis ; CHECK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -70,7 +70,8 @@ ; CHECK-O-NEXT: Running analysis: GlobalsAA ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass +; CHECK-O-NEXT: Running analysis: InliningAdvisorAnalysis ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -78,7 +78,8 @@ ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA ; CHECK-O-NEXT: Running analysis: GlobalsAA ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass +; CHECK-O-NEXT: Running analysis: InliningAdvisorAnalysis ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -65,6 +65,9 @@ ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. +; CHECK-O123-NEXT: Running pass: ModuleInlinerWrapperPass +; CHECK-O123-NEXT: Running analysis: InliningAdvisorAnalysis +; CHECK-O123-NEXT: Starting llvm::Module pass manager run. ; CHECK-O123-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}PassManager<{{.*}}LazyCallGraph::SCC ; CHECK-O123-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O123-NEXT: Running analysis: LazyCallGraphAnalysis @@ -75,6 +78,7 @@ ; CHECK-O123-NEXT: Running pass: InlinerPass on (foo) ; CHECK-O123-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O123-NEXT: Finished CGSCC pass manager run. +; CHECK-O123-NEXT: Finished {{.*}}Module pass manager run. ; CHECK-O123-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: PGOInstrumentationUse ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis @@ -97,7 +101,9 @@ ; CHECK-O-NEXT: Running analysis: GlobalsAA ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass +; CHECK-Os-NEXT: Running analysis: InliningAdvisorAnalysis +; CHECK-Oz-NEXT: Running analysis: InliningAdvisorAnalysis ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -78,7 +78,8 @@ ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA ; CHECK-O-NEXT: Running analysis: GlobalsAA ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass +; CHECK-O-NEXT: Running analysis: InliningAdvisorAnalysis ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy diff --git a/llvm/test/Other/scc-deleted-printer.ll b/llvm/test/Other/scc-deleted-printer.ll --- a/llvm/test/Other/scc-deleted-printer.ll +++ b/llvm/test/Other/scc-deleted-printer.ll @@ -3,6 +3,11 @@ ; RUN: opt < %s 2>&1 -disable-output \ ; RUN: -passes=inline -print-before-all -print-after-all -print-module-scope | FileCheck %s -check-prefix=INL-MOD +; RUN: opt < %s 2>&1 -disable-output \ +; RUN: -passes=inliner-wrapper -print-before-all -print-after-all | FileCheck %s -check-prefix=INL +; RUN: opt < %s 2>&1 -disable-output \ +; RUN: -passes=inliner-wrapper -print-before-all -print-after-all -print-module-scope | FileCheck %s -check-prefix=INL-MOD + ; INL: IR Dump Before {{InlinerPass .*scc: .tester, foo}} ; INL-NOT: IR Dump After {{InlinerPass}} ; INL: IR Dump Before {{InlinerPass .*scc: .tester}} diff --git a/llvm/test/Other/scc-pass-printer.ll b/llvm/test/Other/scc-pass-printer.ll --- a/llvm/test/Other/scc-pass-printer.ll +++ b/llvm/test/Other/scc-pass-printer.ll @@ -3,9 +3,13 @@ ; RUN: opt < %s 2>&1 -disable-output \ ; RUN: -passes=inline -print-after-all | FileCheck %s -check-prefix=INL ; RUN: opt < %s 2>&1 -disable-output \ +; RUN: -passes=inliner-wrapper -print-after-all | FileCheck %s -check-prefix=INL +; RUN: opt < %s 2>&1 -disable-output \ ; RUN: -inline -print-after-all -print-module-scope | FileCheck %s -check-prefix=INL-MOD ; RUN: opt < %s 2>&1 -disable-output \ ; RUN: -passes=inline -print-after-all -print-module-scope | FileCheck %s -check-prefix=INL-MOD +; RUN: opt < %s 2>&1 -disable-output \ +; RUN: -passes=inliner-wrapper -print-after-all -print-module-scope | FileCheck %s -check-prefix=INL-MOD ; INL: IR Dump After {{Function Integration/Inlining|InlinerPass .*scc: .bar, foo}} ; INL: define void @bar() diff --git a/llvm/test/Transforms/Inline/inline_stats.ll b/llvm/test/Transforms/Inline/inline_stats.ll --- a/llvm/test/Transforms/Inline/inline_stats.ll +++ b/llvm/test/Transforms/Inline/inline_stats.ll @@ -6,6 +6,9 @@ ; RUN: opt -S -passes=inline -inliner-function-import-stats=basic < %s 2>&1 | FileCheck %s -check-prefix=CHECK-BASIC -check-prefix=CHECK ; RUN: opt -S -passes=inline -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s -check-prefix="CHECK-VERBOSE" -check-prefix=CHECK +; RUN: opt -S -passes=inliner-wrapper -inliner-function-import-stats=basic < %s 2>&1 | FileCheck %s -check-prefix=CHECK-BASIC -check-prefix=CHECK +; RUN: opt -S -passes=inliner-wrapper -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s -check-prefix="CHECK-VERBOSE" -check-prefix=CHECK + ; CHECK: ------- Dumping inliner stats for [] ------- ; CHECK-BASIC-NOT: -- List of inlined functions: ; CHECK-BASIC-NOT: -- Inlined not imported function diff --git a/llvm/test/Transforms/Inline/inlining-advisor-default.ll b/llvm/test/Transforms/Inline/inlining-advisor-default.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/inlining-advisor-default.ll @@ -0,0 +1,9 @@ +; Check that, in the absence of dependencies, we emit an error message when +; trying to use ML-driven inlining. +; +; RUN: not opt -passes=scc-oz-module-inliner -enable-ml-inliner=development -S < %s 2>&1 | FileCheck %s +; RUN: not opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -S < %s 2>&1 | FileCheck %s + +declare i64 @f1() + +; CHECK: Could not setup Inlining Advisor for the requested mode and/or options \ No newline at end of file diff --git a/llvm/test/Transforms/Inline/internal-scc-members.ll b/llvm/test/Transforms/Inline/internal-scc-members.ll --- a/llvm/test/Transforms/Inline/internal-scc-members.ll +++ b/llvm/test/Transforms/Inline/internal-scc-members.ll @@ -3,6 +3,7 @@ ; ; RUN: opt < %s -S -inline | FileCheck %s ; RUN: opt < %s -S -passes=inline | FileCheck %s +; RUN: opt < %s -S -passes=inliner-wrapper | FileCheck %s ; CHECK-LABEL: define internal void @test1_scc0() ; CHECK-NOT: call diff --git a/llvm/test/Transforms/Inline/module-inlining.ll b/llvm/test/Transforms/Inline/module-inlining.ll --- a/llvm/test/Transforms/Inline/module-inlining.ll +++ b/llvm/test/Transforms/Inline/module-inlining.ll @@ -6,6 +6,7 @@ ; a 'ret 10' ; ; RUN: opt -passes=inline -S < %s | FileCheck %s --check-prefix=INLINE --check-prefix=CHECK +; RUN: opt -passes=inliner-wrapper -S < %s | FileCheck %s --check-prefix=INLINE --check-prefix=CHECK ; RUN: opt -passes=scc-oz-module-inliner -S < %s | FileCheck %s --check-prefix=MODULE --check-prefix=CHECK define void @modify_value({i32, float}* %v) { diff --git a/llvm/test/Transforms/Inline/monster_scc.ll b/llvm/test/Transforms/Inline/monster_scc.ll --- a/llvm/test/Transforms/Inline/monster_scc.ll +++ b/llvm/test/Transforms/Inline/monster_scc.ll @@ -41,6 +41,7 @@ ; ; RUN: opt -S < %s -inline -inline-threshold=150 | FileCheck %s --check-prefixes=CHECK,OLD ; RUN: opt -S < %s -passes=inline -inline-threshold=150 | FileCheck %s --check-prefixes=CHECK,NEW +; RUN: opt -S < %s -passes=inliner-wrapper -inline-threshold=150 | FileCheck %s --check-prefixes=CHECK,NEW target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/Inline/optimization-remarks-hotness-threshold.ll b/llvm/test/Transforms/Inline/optimization-remarks-hotness-threshold.ll --- a/llvm/test/Transforms/Inline/optimization-remarks-hotness-threshold.ll +++ b/llvm/test/Transforms/Inline/optimization-remarks-hotness-threshold.ll @@ -5,6 +5,10 @@ ; RUN: -pass-remarks-with-hotness -pass-remarks-hotness-threshold=1 2>&1 | \ ; RUN: FileCheck -allow-empty -check-prefix=THRESHOLD %s +; RUN: opt < %s -S -passes=inliner-wrapper -pass-remarks-output=%t -pass-remarks=inline \ +; RUN: -pass-remarks-with-hotness -pass-remarks-hotness-threshold=1 2>&1 | \ +; RUN: FileCheck -allow-empty -check-prefix=THRESHOLD %s + ; Check that when any threshold is specified we ignore remarks with no ; hotness -- these are blocks that have not been executed during training. diff --git a/llvm/test/Transforms/Inline/optimization-remarks-passed-yaml.ll b/llvm/test/Transforms/Inline/optimization-remarks-passed-yaml.ll --- a/llvm/test/Transforms/Inline/optimization-remarks-passed-yaml.ll +++ b/llvm/test/Transforms/Inline/optimization-remarks-passed-yaml.ll @@ -8,6 +8,11 @@ ; RUN: -pass-remarks-with-hotness 2>&1 | FileCheck %s ; RUN: cat %t | FileCheck -check-prefix=YAML %s +; RUN: opt < %s -S -passes=inliner-wrapper -pass-remarks-output=%t -pass-remarks=inline \ +; RUN: -pass-remarks-missed=inline -pass-remarks-analysis=inline \ +; RUN: -pass-remarks-with-hotness 2>&1 | FileCheck %s +; RUN: cat %t | FileCheck -check-prefix=YAML %s + ; Check the YAML file for inliner-generated passed and analysis remarks. This ; is the input: diff --git a/llvm/test/Transforms/Inline/optimization-remarks-with-hotness.ll b/llvm/test/Transforms/Inline/optimization-remarks-with-hotness.ll --- a/llvm/test/Transforms/Inline/optimization-remarks-with-hotness.ll +++ b/llvm/test/Transforms/Inline/optimization-remarks-with-hotness.ll @@ -4,6 +4,9 @@ ; RUN: opt < %s -passes=inline -pass-remarks=inline -pass-remarks-missed=inline \ ; RUN: -pass-remarks-analysis=inline -pass-remarks-with-hotness -S 2>&1 \ ; RUN: | FileCheck %s +; RUN: opt < %s -passes=inliner-wrapper -pass-remarks=inline -pass-remarks-missed=inline \ +; RUN: -pass-remarks-analysis=inline -pass-remarks-with-hotness -S 2>&1 \ +; RUN: | FileCheck %s ; CHECK: foo inlined into bar with (cost=always): always inline attribute (hotness: 30) ; CHECK: foz not inlined into bar because it should never be inlined (cost=never): noinline function attribute (hotness: 30) diff --git a/llvm/test/Transforms/Inline/optimization-remarks-yaml.ll b/llvm/test/Transforms/Inline/optimization-remarks-yaml.ll --- a/llvm/test/Transforms/Inline/optimization-remarks-yaml.ll +++ b/llvm/test/Transforms/Inline/optimization-remarks-yaml.ll @@ -34,6 +34,25 @@ ; RUN: opt < %s -S -passes=inline \ ; RUN: -pass-remarks-with-hotness -pass-remarks-hotness-threshold 100 \ ; RUN: -pass-remarks-output=%t.threshold + +; Inliner - Module Wrapper +; RUN: opt < %s -S -passes=inliner-wrapper -pass-remarks-missed=inline \ +; RUN: -pass-remarks-with-hotness -pass-remarks-hotness-threshold 15 \ +; RUN: -pass-remarks-output=%t 2>&1 | FileCheck %s +; RUN: cat %t | FileCheck -check-prefix=YAML %s +; RUN: opt < %s -S -passes=inliner-wrapper -pass-remarks-with-hotness -pass-remarks-output=%t +; RUN: cat %t | FileCheck -check-prefix=YAML %s +; +; Verify that remarks that don't meet the hotness threshold are not output. +; RUN: opt < %s -S -passes=inliner-wrapper -pass-remarks-missed=inline \ +; RUN: -pass-remarks-with-hotness -pass-remarks-hotness-threshold 100 \ +; RUN: -pass-remarks-output=%t.threshold 2>&1 | \ +; RUN: FileCheck -check-prefix=THRESHOLD %s +; RUN: test ! -s %t.threshold +; RUN: opt < %s -S -passes=inliner-wrapper \ +; RUN: -pass-remarks-with-hotness -pass-remarks-hotness-threshold 100 \ +; RUN: -pass-remarks-output=%t.threshold + ; The remarks output file should be empty. ; RUN: test ! -s %t.threshold diff --git a/llvm/test/Transforms/Inline/optimization-remarks.ll b/llvm/test/Transforms/Inline/optimization-remarks.ll --- a/llvm/test/Transforms/Inline/optimization-remarks.ll +++ b/llvm/test/Transforms/Inline/optimization-remarks.ll @@ -12,6 +12,13 @@ ; RUN: -pass-remarks-analysis=inline -pass-remarks-with-hotness -S 2>&1 | \ ; RUN: FileCheck -check-prefix=CHECK -check-prefix=HOTNESS %s +; RUN: opt < %s -passes=inliner-wrapper -pass-remarks=inline -pass-remarks-missed=inline \ +; RUN: -pass-remarks-analysis=inline -S 2>&1 | \ +; RUN: FileCheck -check-prefix=CHECK -check-prefix=NO_HOTNESS %s +; RUN: opt < %s -passes=inliner-wrapper -pass-remarks=inline -pass-remarks-missed=inline \ +; RUN: -pass-remarks-analysis=inline -pass-remarks-with-hotness -S 2>&1 | \ +; RUN: FileCheck -check-prefix=CHECK -check-prefix=HOTNESS %s + ; HOTNESS: fox will not be inlined into bar because its definition is unavailable ; NO_HOTNESS-NOT: fox will not be inlined into bar because its definition is unavailable ; CHECK: foo inlined into bar with (cost=always): always inline attribute