diff --git a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h --- a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h +++ b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h @@ -27,15 +27,20 @@ ReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr OriginalAdvisor, - StringRef RemarksFile, bool EmitRemarks); + StringRef RemarksFile, bool Strict, bool EmitRemarks); std::unique_ptr getAdviceImpl(CallBase &CB) override; bool areReplayRemarksLoaded() const { return HasReplayRemarks; } + ~ReplayInlineAdvisor(); + private: - StringSet<> InlineSitesFromRemarks; std::unique_ptr OriginalAdvisor; bool HasReplayRemarks = false; + bool Strict = false; bool EmitRemarks = false; + + StringMap InlineSitesFromRemarks; + StringSet<> InlineCallersFromRemarks; }; } // namespace llvm #endif // LLVM_ANALYSIS_REPLAYINLINEADVISOR_H diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp --- a/llvm/lib/Analysis/InlineAdvisor.cpp +++ b/llvm/lib/Analysis/InlineAdvisor.cpp @@ -47,6 +47,12 @@ cl::desc("Scale to limit the cost of inline deferral"), cl::init(2), cl::Hidden); +cl::opt InlineReplayStrict( + "inline-replay-strict", cl::init(false), + cl::desc( + "Inline remarks are only replayed on called functions, otherwise defaults to original advisor."), + cl::Hidden); + extern cl::opt InlinerFunctionImportStats; void DefaultInlineAdvice::recordUnsuccessfulInliningImpl( @@ -164,7 +170,7 @@ if (!ReplayFile.empty()) { Advisor = std::make_unique( M, FAM, M.getContext(), std::move(Advisor), ReplayFile, - /* EmitRemarks =*/true); + InlineReplayStrict, /* EmitRemarks =*/true); } break; case InliningAdvisorMode::Development: @@ -460,7 +466,7 @@ ImportedFunctionsStats = std::make_unique(); ImportedFunctionsStats->setModuleInfo(M); - } +} } InlineAdvisor::~InlineAdvisor() { diff --git a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp --- a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp +++ b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp @@ -25,9 +25,9 @@ ReplayInlineAdvisor::ReplayInlineAdvisor( Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr OriginalAdvisor, StringRef RemarksFile, - bool EmitRemarks) + bool Strict, bool EmitRemarks) : InlineAdvisor(M, FAM), OriginalAdvisor(std::move(OriginalAdvisor)), - HasReplayRemarks(false), EmitRemarks(EmitRemarks) { + HasReplayRemarks(false), Strict(Strict), EmitRemarks(EmitRemarks) { auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(RemarksFile); std::error_code EC = BufferOrErr.getError(); if (EC) { @@ -36,48 +36,78 @@ } // Example for inline remarks to parse: - // main:3:1.1: '_Z3subii' inlined into 'main' at callsite sum:1 @ main:3:1.1 + // main:3:1.1: '_Z3subii' inlined into 'main' at callsite sum:1 @ main:3:1.1; // We use the callsite string after `at callsite` to replay inlining. line_iterator LineIt(*BufferOrErr.get(), /*SkipBlanks=*/true); for (; !LineIt.is_at_eof(); ++LineIt) { StringRef Line = *LineIt; auto Pair = Line.split(" at callsite "); - StringRef Callee = Pair.first.split(" inlined into") + StringRef Callee = Pair.first.split(" inlined into ") .first.rsplit(": '") .second.drop_back(); + StringRef Caller = Pair.first.split(" inlined into ") + .second.rsplit("'") + .first.drop_front(); auto CallSite = Pair.second.split(";").first; if (Callee.empty() || CallSite.empty()) continue; std::string Combined = (Callee + CallSite).str(); - InlineSitesFromRemarks.insert(Combined); + InlineSitesFromRemarks[Combined] = false; + if (Strict) + InlineCallersFromRemarks.insert(Caller); } HasReplayRemarks = true; } +ReplayInlineAdvisor::~ReplayInlineAdvisor() { + if (HasReplayRemarks && Strict) { + for (const auto & Remark : InlineSitesFromRemarks) { + if (!Remark.second) { + LLVM_DEBUG(dbgs() << "Inline Replay Strict: Did not apply " << Remark.first() << "\n"); + } + } + } +} + std::unique_ptr ReplayInlineAdvisor::getAdviceImpl(CallBase &CB) { assert(HasReplayRemarks); Function &Caller = *CB.getCaller(); auto &ORE = FAM.getResult(Caller); - if (InlineSitesFromRemarks.empty()) - return std::make_unique(this, CB, None, ORE, - EmitRemarks); + if (!Strict && InlineSitesFromRemarks.empty()) + return std::make_unique(this, CB, None, ORE, EmitRemarks); + + Optional InlineRecommended; - std::string CallSiteLoc = getCallSiteLocation(CB.getDebugLoc()); - StringRef Callee = CB.getCalledFunction()->getName(); - std::string Combined = (Callee + CallSiteLoc).str(); - auto Iter = InlineSitesFromRemarks.find(Combined); + auto GetReplayAdvice = [&](){ + std::string CallSiteLoc = getCallSiteLocation(CB.getDebugLoc()); + StringRef Callee = CB.getCalledFunction()->getName(); + std::string Combined = (Callee + CallSiteLoc).str(); + + auto Iter = InlineSitesFromRemarks.find(Combined); + if (Iter != InlineSitesFromRemarks.end()) { + InlineSitesFromRemarks[Combined] = true; + InlineRecommended = llvm::InlineCost::getAlways("previously inlined"); + } + }; - Optional InlineRecommended = None; - if (Iter != InlineSitesFromRemarks.end()) { - InlineRecommended = llvm::InlineCost::getAlways("found in replay"); + if (Strict) { + if (InlineCallersFromRemarks.count(CB.getFunction()->getName())) + GetReplayAdvice(); + else { + if (OriginalAdvisor) + return OriginalAdvisor->getAdvice(CB); + return {}; + } + } + else + GetReplayAdvice(); - return std::make_unique(this, CB, InlineRecommended, ORE, - EmitRemarks); + return std::make_unique(this, CB, InlineRecommended, ORE, EmitRemarks); } diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -100,6 +100,8 @@ "by inlining from cgscc inline remarks."), cl::Hidden); +extern cl::opt InlineReplayStrict; + static cl::opt InlineEnablePriorityOrder( "inline-enable-priority-order", cl::Hidden, cl::init(false), cl::desc("Enable the priority inline order for the inliner")); @@ -665,6 +667,7 @@ OwnedAdvisor = std::make_unique( M, FAM, M.getContext(), std::move(OwnedAdvisor), CGSCCInlineReplayFile, + InlineReplayStrict, /*EmitRemarks=*/true); return *OwnedAdvisor; @@ -827,8 +830,9 @@ } auto Advice = Advisor.getAdvice(*CB, OnlyMandatory); + // Check whether we want to inline this callsite. - if (!Advice->isInliningRecommended()) { + if (!Advice || !Advice->isInliningRecommended()) { Advice->recordUnattemptedInlining(); continue; } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -237,6 +237,8 @@ "by inlining from sample profile loader."), cl::Hidden); +extern cl::opt InlineReplayStrict; + static cl::opt MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::ZeroOrMore, @@ -1272,12 +1274,14 @@ std::unique_ptr Advice = nullptr; if (ExternalInlineAdvisor) { Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr); - if (!Advice->isInliningRecommended()) { - Advice->recordUnattemptedInlining(); - return InlineCost::getNever("not previously inlined"); + if (Advice) { + if (!Advice->isInliningRecommended()) { + Advice->recordUnattemptedInlining(); + return InlineCost::getNever("not previously inlined"); + } + Advice->recordInlining(); + return InlineCost::getAlways("previously inlined"); } - Advice->recordInlining(); - return InlineCost::getAlways("previously inlined"); } // Adjust threshold based on call site hotness, only do this for callsite @@ -1835,7 +1839,7 @@ if (FAM && !ProfileInlineReplayFile.empty()) { ExternalInlineAdvisor = std::make_unique( M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr, ProfileInlineReplayFile, - /*EmitRemarks=*/false); + InlineReplayStrict, /*EmitRemarks=*/false); if (!ExternalInlineAdvisor->areReplayRemarksLoaded()) ExternalInlineAdvisor.reset(); } diff --git a/llvm/test/Transforms/Inline/cgscc-inline-replay.ll b/llvm/test/Transforms/Inline/cgscc-inline-replay.ll --- a/llvm/test/Transforms/Inline/cgscc-inline-replay.ll +++ b/llvm/test/Transforms/Inline/cgscc-inline-replay.ll @@ -1,8 +1,24 @@ ;; Note that this needs new pass manager for now. Passing `-cgscc-inline-replay` to legacy pass manager is a no-op. +;; Check baseline inline decisions +; RUN: opt < %s -passes=inline -pass-remarks=inline --disable-output 2>&1 | FileCheck -check-prefix=DEFAULT %s + ;; Check replay inline decisions -; RUN: opt < %s -passes=inline -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=DEFAULT %s -; RUN: opt < %s -passes=inline -cgscc-inline-replay=%S/Inputs/cgscc-inline-replay.txt -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=REPLAY %s +; RUN: opt < %s -passes=inline -cgscc-inline-replay=%S/Inputs/cgscc-inline-replay.txt -pass-remarks=inline --disable-output 2>&1 | FileCheck -check-prefix=REPLAY %s + +;; Check strict replay inline decisions +; RUN: opt < %s -passes=inline -cgscc-inline-replay=%S/Inputs/cgscc-inline-replay.txt -inline-replay-strict -debug-only=inline-replay -pass-remarks=inline --disable-output 2>&1 | FileCheck -check-prefix=REPLAY-STRICT %s + +; DEFAULT: '_Z3subii' inlined into '_Z3sumii' with (cost={{[-0-9]+}} +; DEFAULT: '_Z3sumii' inlined into 'main' with (cost={{[-0-9]+}} +; DEFAULT-NOT: '_Z3subii' inlined into 'main' + +; REPLAY: '_Z3sumii' inlined into 'main' with (cost=always) +; REPLAY: '_Z3subii' inlined into 'main' with (cost=always) + +; REPLAY-STRICT: '_Z3subii' inlined into '_Z3sumii' with (cost={{[-0-9]+}} +; REPLAY-STRICT: '_Z3sumii' inlined into 'main' with (cost=always) +; REPLAY-STRICT: Inline Replay Strict: Did not apply _Z3subii_Z3sumii:1:0 @ main:3:0.1 @.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 @@ -109,11 +125,3 @@ !24 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 6) !25 = !DILocation(line: 11, scope: !12) !26 = !DILocation(line: 12, scope: !12) - -; DEFAULT: '_Z3subii' inlined into '_Z3sumii' -; DEFAULT: '_Z3sumii' inlined into 'main' -; DEFAULT-NOT: '_Z3subii' inlined into 'main' - -; REPLAY: '_Z3sumii' inlined into 'main' -; REPLAY: '_Z3subii' inlined into 'main' -; REPLAY-NOT: '_Z3subii' inlined into '_Z3sumii' diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-replay-strict.txt b/llvm/test/Transforms/SampleProfile/Inputs/inline-replay-strict.txt new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-replay-strict.txt @@ -0,0 +1 @@ +remark: calls.cc:10:0: '_Z3sumii' inlined into 'main' to match profiling context with (cost=45, threshold=337) at callsite main:3:0.1; \ No newline at end of file diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt b/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt --- a/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt +++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt @@ -1,2 +1,2 @@ remark: calls.cc:10:0: '_Z3sumii' inlined into 'main' to match profiling context with (cost=45, threshold=337) at callsite main:3:0.1; -remark: calls.cc:4:0: '_Z3subii' inlined into 'main' to match profiling context with (cost=-5, threshold=337) at callsite _Z3sumii:1:0 @ main:3:0.1; +remark: calls.cc:4:0: '_Z3subii' inlined into 'main' to match profiling context with (cost=-5, threshold=337) at callsite _Z3sumii:1:0 @ main:3:0.1; \ No newline at end of file diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-topdown-inline-all.prof b/llvm/test/Transforms/SampleProfile/Inputs/inline-topdown-inline-all.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-topdown-inline-all.prof @@ -0,0 +1,10 @@ +main:225715:0 + 2.1: 5553 + 3: 5391 + 3.1: _Z3sumii:50000 + 1: _Z3subii:50000 + 1: 0 + +_Z3sumii:6010:50000 + 1: _Z3subii:60000 + 1: 9 \ No newline at end of file diff --git a/llvm/test/Transforms/SampleProfile/inline-replay.ll b/llvm/test/Transforms/SampleProfile/inline-replay.ll --- a/llvm/test/Transforms/SampleProfile/inline-replay.ll +++ b/llvm/test/Transforms/SampleProfile/inline-replay.ll @@ -6,6 +6,31 @@ ;; Check replay inline decisions ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-inline-replay=%S/Inputs/inline-replay.txt -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=REPLAY %s +;; Check baseline inline decisions with "inline-topdown-inline-all.prof" which inlines all sites +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown-inline-all.prof -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=DEFAULT-ALL %s + +;; Check strict replay inline decisions with "inline-topdown-inline-all.prof" which only contains: '_Z3sumii' inlined into 'main' +;; 1. _Z3sumii is inlined into main, but all other inline candidates in main (e.g. _Z3subii) are not inlined +;; 2. Inline decisions made in other functions match default sample inlining, in this case _Z3subii is inlined into _Z3sumii +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown-inline-all.prof -sample-profile-inline-replay=%S/Inputs/inline-replay-strict.txt -inline-replay-strict -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=REPLAY-ALL-STRICT %s + + +; DEFAULT: '_Z3sumii' inlined into 'main' to match profiling context with (cost={{[-0-9]+}} +; DEFAULT: '_Z3subii' inlined into '_Z3sumii' to match profiling context with (cost={{[-0-9]+}} +; DEFAULT-NOT: '_Z3subii' inlined into 'main' + +; REPLAY: '_Z3sumii' inlined into 'main' to match profiling context with (cost=always) +; REPLAY: '_Z3subii' inlined into 'main' to match profiling context with (cost=always) +; REPLAY-NOT: '_Z3subii' inlined into '_Z3sumii' + +; DEFAULT-ALL: '_Z3sumii' inlined into 'main' to match profiling context with (cost={{[-0-9]+}} +; DEFAULT-ALL: '_Z3subii' inlined into 'main' to match profiling context with (cost={{[-0-9]+}} +; DEFAULT-ALL: '_Z3subii' inlined into '_Z3sumii' to match profiling context with (cost={{[-0-9]+}} + +; REPLAY-ALL-STRICT : _Z3sumii' inlined into 'main' to match profiling context with (cost=always) +; REPLAY-ALL-STRICT-NOT: '_Z3subii' inlined into 'main' to match profiling context with (cost={{[-0-9]+}} +; REPLAY-ALL-STRICT: '_Z3subii' inlined into '_Z3sumii' to match profiling context with (cost={{[-0-9]+}} + @.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !6 { @@ -111,12 +136,3 @@ !24 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 6) !25 = !DILocation(line: 11, scope: !12) !26 = !DILocation(line: 12, scope: !12) - - -; DEFAULT: '_Z3sumii' inlined into 'main' -; DEFAULT: '_Z3subii' inlined into '_Z3sumii' -; DEFAULT-NOT: '_Z3subii' inlined into 'main' - -; REPLAY: '_Z3sumii' inlined into 'main' -; REPLAY: '_Z3subii' inlined into 'main' -; REPLAY-NOT: '_Z3subii' inlined into '_Z3sumii'