diff --git a/llvm/include/llvm/Analysis/ML/InliningAdvisor.h b/llvm/include/llvm/Analysis/ML/InliningAdvisor.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Analysis/ML/InliningAdvisor.h @@ -0,0 +1,96 @@ +//===- InlinerML.h - ML infrastructure for inliner --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +#ifndef LLVM_ML_INLINERML_H_ +#define LLVM_ML_INLINERML_H_ + +#include +#include + +#include "llvm/IR/PassManager.h" + +namespace llvm { +class CallBase; +class Function; +class Module; +class PreservedAnalyses; + +enum class MLMode : int { Invalid, Rel, Dev }; + +class PendingInliningRecord { +public: + PendingInliningRecord(PendingInliningRecord &&) = delete; + PendingInliningRecord(const PendingInliningRecord &) = delete; + + virtual void recordInlining(bool CalleeWasDeleted, bool SiteWasInlined) = 0; + virtual ~PendingInliningRecord() = default; + +protected: + PendingInliningRecord() = default; +}; + +class InliningAdvisor { +public: + InliningAdvisor(InliningAdvisor &&) = delete; + virtual ~InliningAdvisor() = default; + + virtual std::unique_ptr + shouldInline(CallBase *CB, bool &AlternativeRecommendation, bool Mandatory, + int CostEstimate) = 0; + + virtual void OnPassEntry() = 0; + virtual void OnPassExit() = 0; + virtual void OnSuccessfulInlining(const Function *F) = 0; + + virtual void OnAllInliningCompleted() = 0; + virtual void OnFunctionDeleted(Function *F) = 0; + + static std::unique_ptr + create(Module &, ModuleAnalysisManager &, MLMode Mode); + +protected: + InliningAdvisor() = default; +}; + +class InliningAdvisorAnalysis + : public AnalysisInfoMixin { +public: + InliningAdvisorAnalysis(MLMode Mode) : Mode(Mode) {} + struct Result { + Result(std::unique_ptr Adv) : Advisor(std::move(Adv)) { + assert(Advisor && "Creating an InliningAdvisorAnalysis without a valid " + "InliningAdvisor is not supported"); + } + bool invalidate(Module &, const PreservedAnalyses &, + ModuleAnalysisManager::Invalidator &) { + // InliningAdvisor must be preserved across analysis invalidations. + return false; + } + InliningAdvisor *get() const { return Advisor.get(); } + + private: + std::unique_ptr Advisor; + }; + + Result run(Module &M, ModuleAnalysisManager &MAM); + static AnalysisKey Key; + +private: + static bool isModeSupported(MLMode Mode); + MLMode Mode = MLMode::Invalid; +}; + +class InliningAdvisorCleanupPass + : public PassInfoMixin { +public: + InliningAdvisorCleanupPass() = default; + PreservedAnalyses run(Module &, ModuleAnalysisManager &); +}; + +} // namespace llvm +#endif // LLVM_ML_INLINERML_H_ diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -40,6 +40,7 @@ IVUsers.cpp IndirectCallPromotionAnalysis.cpp InlineCost.cpp + InliningAdvisorAnalysis.cpp InstCount.cpp InstructionPrecedenceTracking.cpp InstructionSimplify.cpp diff --git a/llvm/lib/Analysis/InliningAdvisorAnalysis.cpp b/llvm/lib/Analysis/InliningAdvisorAnalysis.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Analysis/InliningAdvisorAnalysis.cpp @@ -0,0 +1,52 @@ +//===- InliningAdvisorNoop.cpp - noop implementation ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements InliningAdvisor APIs for the case we do not have any of +// the dependencies necessary for ML policies. It also implements the simple +// logic of the +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ML/InliningAdvisor.h" + +namespace llvm { + +std::unique_ptr +createAdvisor(Module &M, ModuleAnalysisManager &MAM, MLMode Mode) { + switch (Mode) { + case MLMode::Invalid: + llvm_unreachable("The MLMode::Invalid case should have been handled in the " + "pass manager"); + return nullptr; + case MLMode::Dev: + case MLMode::Rel: +#if LLVM_HAVE_TF_API || LLVM_HAVE_TF_AOT + return InliningAdvisor::create(M, MAM, Mode); +#endif + break; + } + M.getContext().emitError( + "Could not setup Inlining Advisor for the requested mode and/or options"); + return nullptr; +} + +AnalysisKey InliningAdvisorAnalysis::Key; +InliningAdvisorAnalysis::Result +InliningAdvisorAnalysis::run(Module &M, ModuleAnalysisManager &MAM) { + return Result(createAdvisor(M, MAM, Mode)); +} + +PreservedAnalyses InliningAdvisorCleanupPass::run(Module &M, + ModuleAnalysisManager &MAM) { + auto *Advisor = MAM.getCachedResult(M); + if (Advisor) + Advisor->get()->OnAllInliningCompleted(); + return PreservedAnalyses::all(); +} +} // namespace llvm diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -39,6 +39,7 @@ #include "llvm/Analysis/LoopCacheAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopNestAnalysis.h" +#include "llvm/Analysis/ML/InliningAdvisor.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" @@ -206,15 +207,29 @@ cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)")); -static cl::opt - RunNewGVN("enable-npm-newgvn", cl::init(false), - cl::Hidden, cl::ZeroOrMore, - cl::desc("Run NewGVN instead of GVN")); +static cl::opt RunNewGVN("enable-npm-newgvn", cl::init(false), cl::Hidden, + cl::ZeroOrMore, + cl::desc("Run NewGVN instead of GVN")); static cl::opt EnableGVNHoist( "enable-npm-gvn-hoist", cl::init(false), cl::Hidden, cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); +static cl::opt EnableMLInliner( + "enable-ml-inliner", cl::init(MLMode::Invalid), cl::Hidden, + cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), + cl::values(clEnumValN(MLMode::Invalid, "disabled", + "Heuristics-based inliner version."), + clEnumValN(MLMode::Dev, "dev", + "Use development mode (runtime-loadable model)."), + clEnumValN(MLMode::Rel, "rel", + "Use release mode (AOT-compiled model)."))); + +cl::opt PerformMandatoryInliningsFirst( + "mandatory-inlinings-first", cl::init(false), + cl::desc("Perform all mandatory (always-inline) inlinings first, for the " + "whole module.")); + static cl::opt EnableGVNSink( "enable-npm-gvn-sink", cl::init(false), cl::Hidden, cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); @@ -412,10 +427,8 @@ C(LAM); } -FunctionPassManager -PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, - ThinLTOPhase Phase, - bool DebugLogging) { +FunctionPassManager PassBuilder::buildFunctionSimplificationPipeline( + OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging) { assert(Level != OptimizationLevel::O0 && "Must request optimizations!"); FunctionPassManager FPM(DebugLogging); @@ -442,7 +455,8 @@ if (Level.getSpeedupLevel() > 1) { FPM.addPass(SpeculativeExecutionPass()); - // Optimize based on known information about branches, and cleanup afterward. + // Optimize based on known information about branches, and cleanup + // afterward. FPM.addPass(JumpThreadingPass()); FPM.addPass(CorrelatedValuePropagationPass()); } @@ -516,7 +530,8 @@ // We provide the opt remark emitter pass for LICM to use. We only need to do // this once as it is immutable. - FPM.addPass(RequireAnalysisPass()); + FPM.addPass( + RequireAnalysisPass()); FPM.addPass(createFunctionToLoopPassAdaptor( std::move(LPM1), EnableMSSALoopDependency, DebugLogging)); FPM.addPass(SimplifyCFGPass()); @@ -715,8 +730,12 @@ if (Phase == ThinLTOPhase::PreLink && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) IP.HotCallSiteThreshold = 0; - MainCGPipeline.addPass(InlinerPass(IP)); + if (EnableMLInliner != MLMode::Invalid) { + MPM.addPass(RequireAnalysisPass()); + } + + MainCGPipeline.addPass(InlinerPass(IP)); if (AttributorRun & AttributorRunOption::CGSCC) MainCGPipeline.addPass(AttributorCGSCCPass()); @@ -752,6 +771,8 @@ MPM.addPass( createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass( std::move(MainCGPipeline), MaxDevirtIterations))); + if (EnableMLInliner != MLMode::Invalid) + MPM.addPass(InliningAdvisorCleanupPass()); return MPM; } @@ -1006,11 +1027,11 @@ // convert to more optimized IR using more aggressive simplify CFG options. // The extra sinking transform can create larger basic blocks, so do this // before SLP vectorization. - OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions(). - forwardSwitchCondToPhi(true). - convertSwitchToLookupTable(true). - needCanonicalLoops(false). - sinkCommonInsts(true))); + OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions() + .forwardSwitchCondToPhi(true) + .convertSwitchToLookupTable(true) + .needCanonicalLoops(false) + .sinkCommonInsts(true))); // Optimize parallel scalar instruction chains into SIMD instructions. if (PTO.SLPVectorization) @@ -1033,7 +1054,8 @@ PTO.ForgetAllSCEVInLoopUnroll))); OptimizePM.addPass(WarnMissedTransformationsPass()); OptimizePM.addPass(InstCombinePass()); - OptimizePM.addPass(RequireAnalysisPass()); + OptimizePM.addPass( + RequireAnalysisPass()); OptimizePM.addPass(createFunctionToLoopPassAdaptor( LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), EnableMSSALoopDependency, DebugLogging)); @@ -1274,8 +1296,8 @@ } // Now deduce any function attributes based in the current code. - MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( - PostOrderFunctionAttrsPass())); + MPM.addPass( + createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass())); // Do RPO function attribute inference across the module to forward-propagate // attributes where applicable. @@ -1367,8 +1389,8 @@ // Run a few AA driver optimizations here and now to cleanup the code. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( - PostOrderFunctionAttrsPass())); + MPM.addPass( + createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass())); // FIXME: here we run IP alias analysis in the legacy PM. FunctionPassManager MainFPM; @@ -1628,7 +1650,8 @@ return make_error( formatv("invalid argument to SimplifyCFG pass bonus-threshold " "parameter: '{0}' ", - ParamName).str(), + ParamName) + .str(), inconvertibleErrorCode()); Result.bonusInstThreshold(BonusInstThreshold.getSExtValue()); } else { diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -27,6 +27,7 @@ MODULE_ANALYSIS("verify", VerifierAnalysis()) MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) MODULE_ANALYSIS("asan-globals-md", ASanGlobalsMetadataAnalysis()) +MODULE_ANALYSIS("inlining-advisor", InliningAdvisorAnalysis(EnableMLInliner)) #ifndef MODULE_ALIAS_ANALYSIS #define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ @@ -65,6 +66,8 @@ MODULE_PASS("ipsccp", IPSCCPPass()) MODULE_PASS("lowertypetests", LowerTypeTestsPass(nullptr, nullptr)) MODULE_PASS("mergefunc", MergeFunctionsPass()) +MODULE_PASS("scc-oz-module-inliner", + buildInlinerPipeline(OptimizationLevel::Oz, ThinLTOPhase::None, DebugLogging)) MODULE_PASS("name-anon-globals", NameAnonGlobalPass()) MODULE_PASS("no-op-module", NoOpModulePass()) MODULE_PASS("partial-inliner", PartialInlinerPass()) diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -30,12 +30,11 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/ML/InliningAdvisor.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" @@ -57,8 +56,10 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include #include @@ -158,9 +159,9 @@ /// *actually make it to the backend*, which is really what we want. /// /// Because we don't have this information, we do this simple and useful hack. -static void mergeInlinedArrayAllocas( - Function *Caller, InlineFunctionInfo &IFI, - InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory) { +static void mergeInlinedArrayAllocas(Function *Caller, InlineFunctionInfo &IFI, + InlinedArrayAllocasTy &InlinedArrayAllocas, + int InlineHistory) { SmallPtrSet UsedAllocas; // When processing our SCC, check to see if CS was inlined from some other @@ -890,6 +891,24 @@ assert(InitialC.size() > 0 && "Cannot handle an empty SCC!"); Module &M = *InitialC.begin()->getFunction().getParent(); ProfileSummaryInfo *PSI = MAM.getCachedResult(M); + auto *IAA = MAM.getCachedResult(M); + InliningAdvisor *Advisor = IAA ? IAA->get() : nullptr; + if (Advisor) + Advisor->OnPassEntry(); + + // Avoid subtle bugs due to alternative exits from this method - if we have + // an advisor, ensure it is always informed when we're done with a scc. + class AdvisorExitCapture final { + InliningAdvisor *const Advisor; + + public: + AdvisorExitCapture(InliningAdvisor *A) : Advisor(A) {} + ~AdvisorExitCapture() { + if (Advisor) + Advisor->OnPassExit(); + } + }; + AdvisorExitCapture Capturer(Advisor); if (!ImportedFunctionsStats && InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) { @@ -1003,8 +1022,7 @@ // node however because those functions aren't going to be mutated by this // pass. FunctionAnalysisManager &FAM = - AM.getResult(*C, CG) - .getManager(); + AM.getResult(*C, CG).getManager(); // Get the remarks emission analysis for the caller. auto &ORE = FAM.getResult(F); @@ -1060,17 +1078,54 @@ continue; } - Optional OIC = shouldInline(*CS, GetInlineCost, ORE); - // Check whether we want to inline this callsite. - if (!OIC.hasValue()) { - setInlineRemark(*CS, "deferred"); + auto TrivialDecision = llvm::getAttributeBasedInliningDecision( + *CS, CS->getCalledFunction(), FAM.getResult(Callee), + GetTLI); + + if (Advisor && + ((TrivialDecision.hasValue() && !TrivialDecision->isSuccess()) || + &Callee == &F)) continue; - } - if (!OIC.getValue()) { - // shouldInline() call returned a negative inline cost that explains - // why this callsite should not be inlined. - setInlineRemark(*CS, inlineCostStr(*OIC)); + // TODO(mtrofin): this replicates the already calculated + // TrivialDecision part when we don't do inference. Refactor to avoid. + const bool Mandatory = + TrivialDecision.hasValue() && TrivialDecision->isSuccess(); + + // TODO(mtrofin): no need to compute OIC if Advisor is doing inference and + // no logging. + Optional OIC = shouldInline(*CS, GetInlineCost, ORE); + assert(!Mandatory || (OIC.hasValue() && OIC.getValue())); + bool ShouldInline = Mandatory || (OIC.hasValue() && OIC.getValue()); + // A deep analysis of the callsite may reveal blocking reasons for not + // inlining, such as VarArgs, or large stack sizes. Stop in that case, as + // inlining would cause a correctness problem. + int CostEstimate = 0; + // If the inlining is mandatory, we won't use the cost, so can set it to 0 + if (!Mandatory) { + auto IsCallsiteInlinable = llvm::getInliningCostEstimate( + *CS, FAM.getResult(Callee), GetAssumptionCache, + {}, nullptr, nullptr); + if (!IsCallsiteInlinable) + continue; + CostEstimate = IsCallsiteInlinable.getValue(); + } + std::unique_ptr PendingRecord; + if (Advisor) { + PendingRecord = + Advisor->shouldInline(CS, ShouldInline, Mandatory, CostEstimate); + } + if (!ShouldInline) { + // Check whether we want to inline this callsite. + if (!OIC.hasValue()) { + setInlineRemark(*CS, "deferred"); + } else if (!OIC.getValue()) { + // shouldInline() call returned a negative inline cost that explains + // why this callsite should not be inlined. + setInlineRemark(*CS, inlineCostStr(*OIC)); + } + if (PendingRecord) + PendingRecord->recordInlining(false, false); continue; } @@ -1089,14 +1144,17 @@ InlineResult IR = InlineFunction(*CS, IFI); if (!IR.isSuccess()) { - setInlineRemark(*CS, std::string(IR.getFailureReason()) + "; " + - inlineCostStr(*OIC)); + setInlineRemark( + *CS, std::string(IR.getFailureReason()) + "; " + + (OIC.hasValue() ? inlineCostStr(*OIC) : "ML Advisor")); ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) << NV("Callee", &Callee) << " will not be inlined into " << NV("Caller", &F) << ": " << NV("Reason", IR.getFailureReason()); }); + if (PendingRecord) + PendingRecord->recordInlining(false, false); continue; } DidInline = true; @@ -1104,7 +1162,10 @@ ++NumInlined; - emitInlinedInto(ORE, DLoc, Block, Callee, F, *OIC); + // TODO(mtrofin): OIC may not have value if Advisor decided against + // inlining. We should still emit a remark. + if (OIC.hasValue()) + emitInlinedInto(ORE, DLoc, Block, Callee, F, *OIC); // Add any new callsites to defined functions to the worklist. if (!IFI.InlinedCallSites.empty()) { @@ -1138,6 +1199,7 @@ // dead. In that case, we can drop the body of the function eagerly // which may reduce the number of callers of other functions to one, // changing inline cost thresholds. + bool CalleeWasDeleted = false; if (Callee.hasLocalLinkage()) { // To check this we also need to nuke any dead constant uses (perhaps // made dead by this operation on other functions). @@ -1157,8 +1219,13 @@ assert(find(DeadFunctions, &Callee) == DeadFunctions.end() && "Cannot put cause a function to become dead twice!"); DeadFunctions.push_back(&Callee); + CalleeWasDeleted = true; + if (Advisor) + Advisor->OnFunctionDeleted(&Callee); } } + if (PendingRecord) + PendingRecord->recordInlining(CalleeWasDeleted, true); } // Back the call index up by one to put us in a good position to go around @@ -1237,8 +1304,7 @@ // function there. Also, cclear out any cached analyses. auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF)); FunctionAnalysisManager &FAM = - AM.getResult(DeadC, CG) - .getManager(); + AM.getResult(DeadC, CG).getManager(); FAM.clear(*DeadF, DeadF->getName()); AM.clear(DeadC, DeadC.getName()); auto &DeadRC = DeadC.getOuterRefSCC(); @@ -1250,7 +1316,19 @@ UR.InvalidatedRefSCCs.insert(&DeadRC); // And delete the actual function from the module. - M.getFunctionList().erase(DeadF); + // If we use the Advisor, it uses Function pointers to index various + // maps, e.g. memoization. Function cleanup passes like argument promotion + // create new functions. It is possible for a new function to be allocated + // at the address of a deleted function. + // We could index using names, but that's inefficient. Alternatively, + // we let the Advisor free the functions. + if (Advisor) { + DeadF->getBasicBlockList().clear(); + M.getFunctionList().remove(DeadF); + } else { + M.getFunctionList().erase(DeadF); + } + ++NumDeleted; } diff --git a/llvm/test/Transforms/Inline/inlining-advisor-default.ll b/llvm/test/Transforms/Inline/inlining-advisor-default.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/inlining-advisor-default.ll @@ -0,0 +1,9 @@ +; Check that, in the absence of dependencies, we emit an error message when +; trying to use ML-driven inlining. +; +; RUN: not opt -passes=scc-oz-module-inliner -enable-ml-inliner=dev -S < %s 2>&1 | FileCheck %s +; RUN: not opt -passes=scc-oz-module-inliner -enable-ml-inliner=rel -S < %s 2>&1 | FileCheck %s + +declare i64 @f1() + +; CHECK: Could not setup Inlining Advisor for the requested mode and/or options \ No newline at end of file