diff --git a/clang/test/Frontend/optimization-remark-line-directive.c b/clang/test/Frontend/optimization-remark-line-directive.c --- a/clang/test/Frontend/optimization-remark-line-directive.c +++ b/clang/test/Frontend/optimization-remark-line-directive.c @@ -2,11 +2,11 @@ // directives. We cannot map #line directives back to // a SourceLocation. -// RUN: %clang_cc1 %s -Rpass=inline -debug-info-kind=line-tables-only -emit-llvm-only -verify -fno-experimental-new-pass-manager +// RUN: %clang_cc1 %s -Rpass=inline -debug-info-kind=line-tables-only -emit-llvm-only -verify -fno-experimental-new-pass-manager -mllvm -mandatory-inlining-first=0 // The new PM inliner is not added to the default pipeline at O0, so we add // some optimizations to trigger it. -// RUN: %clang_cc1 %s -Rpass=inline -fexperimental-new-pass-manager -O1 -debug-info-kind=line-tables-only -emit-llvm-only -verify +// RUN: %clang_cc1 %s -Rpass=inline -fexperimental-new-pass-manager -O1 -debug-info-kind=line-tables-only -emit-llvm-only -verify -mllvm -mandatory-inlining-first=0 int foo(int x, int y) __attribute__((always_inline)); int foo(int x, int y) { return x + y; } diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h --- a/llvm/include/llvm/Analysis/InlineAdvisor.h +++ b/llvm/include/llvm/Analysis/InlineAdvisor.h @@ -27,6 +27,8 @@ /// There are 3 scenarios we can use the InlineAdvisor: /// - Default - use manual heuristics. /// +/// - MandatoryOnly - only mandatory inlinings (i.e. AlwaysInline). +/// /// - Release mode, the expected mode for production, day to day deployments. /// In this mode, when building the compiler, we also compile a pre-trained ML /// model to native code, and link it as a static library. This mode has low @@ -37,7 +39,12 @@ /// requires the full C Tensorflow API library, and evaluates models /// dynamically. This mode also permits generating training logs, for offline /// training. -enum class InliningAdvisorMode : int { Default, Release, Development }; +enum class InliningAdvisorMode : int { + Default, + MandatoryOnly, + Release, + Development +}; class InlineAdvisor; /// Capture state between an inlining decision having had been made, and @@ -178,6 +185,20 @@ InlineParams Params; }; +/// Advisor recommending only mandatory (AlwaysInline) cases. +class MandatoryInlineAdvisor final : public InlineAdvisor { + std::unique_ptr getAdvice(CallBase &CB) override; + +public: + MandatoryInlineAdvisor(FunctionAnalysisManager &FAM) : InlineAdvisor(FAM) {} + + enum class MandatoryInliningKind { NotMandatory, Always, Never }; + + static MandatoryInliningKind getMandatoryKind(CallBase &CB, + FunctionAnalysisManager &FAM, + OptimizationRemarkEmitter &ORE); +}; + /// The InlineAdvisorAnalysis is a module pass because the InlineAdvisor /// needs to capture state right before inlining commences over a module. class InlineAdvisorAnalysis : public AnalysisInfoMixin { diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -344,7 +344,8 @@ /// Construct the module pipeline that performs inlining as well as /// the inlining-driven cleanups. ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, - ThinLTOPhase Phase); + ThinLTOPhase Phase, + bool MandatoryOnly); /// Construct the core LLVM module optimization pipeline. /// diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp --- a/llvm/lib/Analysis/InlineAdvisor.cpp +++ b/llvm/lib/Analysis/InlineAdvisor.cpp @@ -158,6 +158,9 @@ case InliningAdvisorMode::Default: Advisor.reset(new DefaultInlineAdvisor(FAM, Params)); break; + case InliningAdvisorMode::MandatoryOnly: + Advisor.reset(new MandatoryInlineAdvisor(FAM)); + break; case InliningAdvisorMode::Development: #ifdef LLVM_HAVE_TF_API Advisor = @@ -437,3 +440,38 @@ return Remark; }); } + +std::unique_ptr MandatoryInlineAdvisor::getAdvice(CallBase &CB) { + auto &Caller = *CB.getCaller(); + auto &Callee = *CB.getCalledFunction(); + auto &ORE = FAM.getResult(Caller); + + bool Advice = MandatoryInliningKind::Always == + MandatoryInlineAdvisor::getMandatoryKind(CB, FAM, ORE) && + &Caller != &Callee; + return std::make_unique(this, CB, ORE, Advice); +} + +MandatoryInlineAdvisor::MandatoryInliningKind +MandatoryInlineAdvisor::getMandatoryKind(CallBase &CB, + FunctionAnalysisManager &FAM, + OptimizationRemarkEmitter &ORE) { + auto &Callee = *CB.getCalledFunction(); + + auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { + return FAM.getResult(F); + }; + + auto &TIR = FAM.getResult(Callee); + + auto TrivialDecision = + llvm::getAttributeBasedInliningDecision(CB, &Callee, TIR, GetTLI); + + if (TrivialDecision.hasValue()) { + if (TrivialDecision->isSuccess()) + return MandatoryInliningKind::Always; + else + return MandatoryInliningKind::Never; + } + return MandatoryInliningKind::NotMandatory; +} diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp --- a/llvm/lib/Analysis/MLInlineAdvisor.cpp +++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp @@ -175,25 +175,20 @@ auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return FAM.getResult(F); }; - auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { - return FAM.getResult(F); - }; - auto &TIR = FAM.getResult(Callee); auto &ORE = FAM.getResult(Caller); - auto TrivialDecision = - llvm::getAttributeBasedInliningDecision(CB, &Callee, TIR, GetTLI); - + auto MandatoryKind = MandatoryInlineAdvisor::getMandatoryKind(CB, FAM, ORE); // If this is a "never inline" case, there won't be any changes to internal // state we need to track, so we can just return the base InlineAdvice, which // will do nothing interesting. // Same thing if this is a recursive case. - if ((TrivialDecision.hasValue() && !TrivialDecision->isSuccess()) || + if (MandatoryKind == MandatoryInlineAdvisor::MandatoryInliningKind::Never || &Caller == &Callee) return std::make_unique(this, CB, ORE, false); - bool Mandatory = TrivialDecision.hasValue() && TrivialDecision->isSuccess(); + bool Mandatory = + MandatoryKind == MandatoryInlineAdvisor::MandatoryInliningKind::Always; // If we need to stop, we won't want to track anymore any state changes, so // we just return the base InlineAdvice, which acts as a noop. diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -299,6 +299,11 @@ cl::Hidden, cl::ZeroOrMore, cl::desc("Enable memory profiler")); +static cl::opt PerformMandatoryInliningsFirst( + "mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore, + cl::desc("Perform mandatory inlinings module-wide, before performing " + "inlining.")); + PipelineTuningOptions::PipelineTuningOptions() { LoopInterleaving = true; LoopVectorization = true; @@ -906,7 +911,8 @@ } ModuleInlinerWrapperPass -PassBuilder::buildInlinerPipeline(OptimizationLevel Level, ThinLTOPhase Phase) { +PassBuilder::buildInlinerPipeline(OptimizationLevel Level, ThinLTOPhase Phase, + bool MandatoryOnly) { InlineParams IP = getInlineParamsFromOptLevel(Level); if (Phase == PassBuilder::ThinLTOPhase::PreLink && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) @@ -915,8 +921,10 @@ if (PGOOpt) IP.EnableDeferral = EnablePGOInlineDeferral; - ModuleInlinerWrapperPass MIWP(IP, DebugLogging, UseInlineAdvisor, - MaxDevirtIterations); + ModuleInlinerWrapperPass MIWP( + IP, DebugLogging, + (MandatoryOnly ? InliningAdvisorMode::MandatoryOnly : UseInlineAdvisor), + MaxDevirtIterations); // Require the GlobalsAA analysis for the module so we can query it within // the CGSCC pipeline. @@ -946,6 +954,9 @@ // Now deduce any function attributes based in the current code. MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); + if (MandatoryOnly) + return MIWP; + // When at O3 add argument promotion to the pass pipeline. // FIXME: It isn't at all clear why this should be limited to O3. if (Level == OptimizationLevel::O3) @@ -1104,7 +1115,9 @@ if (EnableSyntheticCounts && !PGOOpt) MPM.addPass(SyntheticCountsPropagation()); - MPM.addPass(buildInlinerPipeline(Level, Phase)); + if (PerformMandatoryInliningsFirst) + MPM.addPass(buildInlinerPipeline(Level, Phase, /*MandatoryOnly=*/true)); + MPM.addPass(buildInlinerPipeline(Level, Phase, /*MandatoryOnly=*/false)); if (EnableMemProfiler && Phase != ThinLTOPhase::PreLink) { MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -62,6 +62,10 @@ MODULE_PASS("khwasan", HWAddressSanitizerPass(true, true)) MODULE_PASS("inferattrs", InferFunctionAttrsPass()) MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass()) +MODULE_PASS("always-inliner-wrapper", ModuleInlinerWrapperPass( + getInlineParams(), + DebugLogging, + InliningAdvisorMode::MandatoryOnly)) MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass()) MODULE_PASS("instrorderfile", InstrOrderFilePass()) MODULE_PASS("instrprof", InstrProfiling()) @@ -95,7 +99,8 @@ MODULE_PASS("rpo-function-attrs", ReversePostOrderFunctionAttrsPass()) MODULE_PASS("sample-profile", SampleProfileLoaderPass()) MODULE_PASS("scc-oz-module-inliner", - buildInlinerPipeline(OptimizationLevel::Oz, ThinLTOPhase::None)) + buildInlinerPipeline(OptimizationLevel::Oz, ThinLTOPhase::None, + /*MandatoryOnly=*/false)) MODULE_PASS("loop-extract-single", LoopExtractorPass(1)) MODULE_PASS("oz-module-optimizer", buildModuleOptimizationPipeline(OptimizationLevel::Oz, /*LTOPreLink*/false)) diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -92,11 +92,6 @@ DisableInlinedAllocaMerging("disable-inlined-alloca-merging", cl::init(false), cl::Hidden); -/// Flag to disable adding AlwaysInlinerPass to ModuleInlinerWrapperPass. -/// TODO: remove this once this has is baked in for long enough. -static cl::opt DisableAlwaysInlinerInModuleWrapper( - "disable-always-inliner-in-module-wrapper", cl::init(false), cl::Hidden); - namespace { enum class InlinerFunctionImportStatsOpts { @@ -1046,8 +1041,6 @@ return PreservedAnalyses::all(); } - if (!DisableAlwaysInlinerInModuleWrapper) - MPM.addPass(AlwaysInlinerPass()); // We wrap the CGSCC pipeline in a devirtualization repeater. This will try // to detect when we devirtualize indirect calls and iterate the SCC passes // in that case to try and catch knock-on inlining or function attrs diff --git a/llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll b/llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll --- a/llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll +++ b/llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll @@ -7,18 +7,18 @@ ; REQUIRES: have_tf_api ; ; When the bounds are very wide ("no bounds"), all inlinings happen. -; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -training-log=- -enable-ml-inliner=development -ml-advisor-size-increase-threshold=10.0 -disable-always-inliner-in-module-wrapper -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=NOBOUNDS +; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -training-log=- -enable-ml-inliner=development -ml-advisor-size-increase-threshold=10.0 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=NOBOUNDS ; ; When the bounds are very restrictive, the first inlining happens but it's ; considered as "bad" (since it trips over the bounds) and its reward is a ; penalty. However, the mandatory inlining, which is considered next, happens. ; No other inlinings happend. -; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -training-log=- -enable-ml-inliner=development -ml-advisor-size-increase-threshold=1.0 -disable-always-inliner-in-module-wrapper -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOUNDS +; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -training-log=- -enable-ml-inliner=development -ml-advisor-size-increase-threshold=1.0 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOUNDS ; ; With more restrictive bounds, the first inlining happens and is OK. The ; mandatory inlining happens next, and it trips over the bounds, which then ; forces no further inlinings. -; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -training-log=- -enable-ml-inliner=development -ml-advisor-size-increase-threshold=1.1 -disable-always-inliner-in-module-wrapper -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=RELAXED-BOUNDS +; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%S/../../../../lib/Analysis/models/inliner -training-log=- -enable-ml-inliner=development -ml-advisor-size-increase-threshold=1.1 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=RELAXED-BOUNDS target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-grtev4-linux-gnu" diff --git a/llvm/test/Transforms/Inline/ML/bounds-checks.ll b/llvm/test/Transforms/Inline/ML/bounds-checks.ll --- a/llvm/test/Transforms/Inline/ML/bounds-checks.ll +++ b/llvm/test/Transforms/Inline/ML/bounds-checks.ll @@ -4,7 +4,7 @@ ; factor, we don't inline anymore. ; REQUIRES: have_tf_aot ; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -ml-advisor-size-increase-threshold=10.0 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=NOBOUNDS -; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -ml-advisor-size-increase-threshold=1.0 -disable-always-inliner-in-module-wrapper -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOUNDS +; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -ml-advisor-size-increase-threshold=1.0 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOUNDS target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-grtev4-linux-gnu" diff --git a/llvm/test/Transforms/Inline/inline_stats.ll b/llvm/test/Transforms/Inline/inline_stats.ll --- a/llvm/test/Transforms/Inline/inline_stats.ll +++ b/llvm/test/Transforms/Inline/inline_stats.ll @@ -6,8 +6,11 @@ ; RUN: opt -S -passes=inline -inliner-function-import-stats=basic < %s 2>&1 | FileCheck %s -check-prefix=CHECK-BASIC -check-prefix=CHECK ; RUN: opt -S -passes=inline -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s -check-prefix="CHECK-VERBOSE" -check-prefix=CHECK -; RUN: opt -S -passes=inliner-wrapper -inliner-function-import-stats=basic < %s 2>&1 | FileCheck %s -check-prefix=WRAPPER-BASIC -check-prefix=WRAPPER -; RUN: opt -S -passes=inliner-wrapper -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s -check-prefix=WRAPPER-VERBOSE -check-prefix=WRAPPER +; RUN: opt -S -passes=inliner-wrapper -inliner-function-import-stats=basic < %s 2>&1 | FileCheck %s -check-prefix=CHECK-BASIC -check-prefix=CHECK +; RUN: opt -S -passes=inliner-wrapper -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s -check-prefix="CHECK-VERBOSE" -check-prefix=CHECK + +; RUN: opt -S -passes=always-inliner-wrapper,inliner-wrapper -inliner-function-import-stats=basic < %s 2>&1 | FileCheck %s -check-prefix=WRAPPER-BASIC -check-prefix=WRAPPER +; RUN: opt -S -passes=always-inliner-wrapper,inliner-wrapper -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s -check-prefix=WRAPPER-VERBOSE -check-prefix=WRAPPER ; CHECK: ------- Dumping inliner stats for [] ------- ; CHECK-BASIC-NOT: -- List of inlined functions: