diff --git a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h --- a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h +++ b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h @@ -25,13 +25,14 @@ class ReplayInlineAdvisor : public InlineAdvisor { public: ReplayInlineAdvisor(FunctionAnalysisManager &FAM, LLVMContext &Context, - StringRef RemarksFile); + StringRef RemarksFile, bool EmitRemarks); std::unique_ptr getAdvice(CallBase &CB) override; bool areReplayRemarksLoaded() const { return HasReplayRemarks; } private: StringSet<> InlineSitesFromRemarks; bool HasReplayRemarks = false; + bool EmitRemarks = false; }; } // namespace llvm #endif // LLVM_ANALYSIS_REPLAYINLINEADVISOR_H diff --git a/llvm/include/llvm/Transforms/IPO/Inliner.h b/llvm/include/llvm/Transforms/IPO/Inliner.h --- a/llvm/include/llvm/Transforms/IPO/Inliner.h +++ b/llvm/include/llvm/Transforms/IPO/Inliner.h @@ -14,6 +14,7 @@ #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/ReplayInlineAdvisor.h" #include "llvm/IR/PassManager.h" #include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" #include @@ -109,6 +110,8 @@ FunctionAnalysisManager &FAM, Module &M); std::unique_ptr ImportedFunctionsStats; Optional OwnedDefaultAdvisor; + // External inline advisor used to replay inline decision from remarks. + Optional ReplayAdvisor; }; /// Module pass, wrapping the inliner pass. This works in conjunction with the diff --git a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp --- a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp +++ b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp @@ -22,8 +22,9 @@ ReplayInlineAdvisor::ReplayInlineAdvisor(FunctionAnalysisManager &FAM, LLVMContext &Context, - StringRef RemarksFile) - : InlineAdvisor(FAM), HasReplayRemarks(false) { + StringRef RemarksFile, + bool EmitRemarks) + : InlineAdvisor(FAM), HasReplayRemarks(false), EmitRemarks(EmitRemarks) { auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(RemarksFile); std::error_code EC = BufferOrErr.getError(); if (EC) { @@ -32,19 +33,66 @@ } // Example for inline remarks to parse: - // _Z3subii inlined into main [details] at callsite sum:1 @ main:3.1 + // main:3:1.1: _Z3subii inlined into main at callsite sum:1 @ main:3:1.1 // We use the callsite string after `at callsite` to replay inlining. line_iterator LineIt(*BufferOrErr.get(), /*SkipBlanks=*/true); for (; !LineIt.is_at_eof(); ++LineIt) { StringRef Line = *LineIt; auto Pair = Line.split(" at callsite "); - if (Pair.second.empty()) + + auto Callee = Pair.first.split(" inlined into").first.rsplit(": ").second; + + auto CallSite = Pair.second.split(";").first; + + if (Callee.empty() || CallSite.empty()) continue; - InlineSitesFromRemarks.insert(Pair.second); + + std::string Combined = (Callee + CallSite).str(); + InlineSitesFromRemarks.insert(Combined); } + HasReplayRemarks = true; } +namespace { +class ReplayInlineAdvice : public InlineAdvice { +public: + ReplayInlineAdvice(InlineAdvisor *Advisor, CallBase &CB, InlineCost OIC, + OptimizationRemarkEmitter &ORE, bool EmitRemarks) + : InlineAdvice(Advisor, CB, ORE, OIC.isAlways()), OriginalCB(&CB), + OIC(OIC), EmitRemarks(EmitRemarks) {} + +private: + void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { + using namespace ore; + llvm::setInlineRemark(*OriginalCB, std::string(Result.getFailureReason()) + + "; " + inlineCostStr(*OIC)); + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) + << NV("Callee", Callee) << " will not be inlined into " + << NV("Caller", Caller) << ": " + << NV("Reason", Result.getFailureReason()); + }); + } + + void recordInliningWithCalleeDeletedImpl() override { + if (EmitRemarks) + emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC); + } + + void recordInliningImpl() override { + if (EmitRemarks) + emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC); + } + +private: + CallBase *const OriginalCB; + Optional OIC; + bool EmitRemarks; +}; + +} // namespace + std::unique_ptr ReplayInlineAdvisor::getAdvice(CallBase &CB) { assert(HasReplayRemarks); @@ -52,9 +100,19 @@ auto &ORE = FAM.getResult(Caller); if (InlineSitesFromRemarks.empty()) - return std::make_unique(this, CB, ORE, false); + return std::make_unique( + this, CB, llvm::InlineCost::getNever("replay empty"), ORE, EmitRemarks); std::string CallSiteLoc = getCallSiteLocation(CB.getDebugLoc()); - bool InlineRecommended = InlineSitesFromRemarks.count(CallSiteLoc) > 0; - return std::make_unique(this, CB, ORE, InlineRecommended); + StringRef Callee = CB.getCalledFunction()->getName(); + std::string Combined = (Callee + CallSiteLoc).str(); + auto Iter = InlineSitesFromRemarks.find(Combined); + + auto InlineRecommended = llvm::InlineCost::getNever("not found in replay"); + if (Iter != InlineSitesFromRemarks.end()) { + InlineRecommended = llvm::InlineCost::getAlways("found in replay"); + } + + return std::make_unique(this, CB, InlineRecommended, ORE, + EmitRemarks); } diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -90,6 +90,13 @@ DisableInlinedAllocaMerging("disable-inlined-alloca-merging", cl::init(false), cl::Hidden); +static cl::opt CGSCCInlineReplayFile( + "cgscc-inline-replay", cl::init(""), cl::value_desc("filename"), + cl::desc( + "Optimization remarks file containing inline remarks to be replayed " + "by inlining from sample profile loader."), + cl::Hidden); + namespace { enum class InlinerFunctionImportStatsOpts { @@ -658,6 +665,15 @@ InlineAdvisor & InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM, FunctionAnalysisManager &FAM, Module &M) { + + if (!CGSCCInlineReplayFile.empty()) { + if (!ReplayAdvisor) + ReplayAdvisor.emplace(FAM, M.getContext(), CGSCCInlineReplayFile, + /* EmitRemarks =*/true); + + return *ReplayAdvisor; + } + auto *IAA = MAM.getCachedResult(M); if (!IAA) { // It should still be possible to run the inliner as a stand-alone SCC pass, diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1960,7 +1960,7 @@ if (FAM && !ProfileInlineReplayFile.empty()) { ExternalInlineAdvisor = std::make_unique( - *FAM, Ctx, ProfileInlineReplayFile); + *FAM, Ctx, ProfileInlineReplayFile, /*EmitRemarks=*/false); if (!ExternalInlineAdvisor->areReplayRemarksLoaded()) ExternalInlineAdvisor.reset(); } diff --git a/llvm/test/Transforms/Inline/Inputs/cgscc-inline-replay.txt b/llvm/test/Transforms/Inline/Inputs/cgscc-inline-replay.txt new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/Inputs/cgscc-inline-replay.txt @@ -0,0 +1,2 @@ +remark: calls.cc:10:0: _Z3sumii inlined into main with (cost=45, threshold=337) at callsite main:3:0.1; +remark: calls.cc:4:0: _Z3subii inlined into main with (cost=-5, threshold=337) at callsite _Z3sumii:1:0 @ main:3:0.1; diff --git a/llvm/test/Transforms/Inline/cgscc-inline-replay.ll b/llvm/test/Transforms/Inline/cgscc-inline-replay.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/cgscc-inline-replay.ll @@ -0,0 +1,114 @@ +;; Note that this needs new pass manager for now. Passing `-cgscc-inline-replay` to legacy pass manager is a no-op. + +;; Check replay inline decisions +; RUN: opt < %s -passes=inline -cgscc-inline-replay=%S/Inputs/cgscc-inline-replay.txt -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=REPLAY %s + +@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 + +define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !6 { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %tmp = load i32, i32* %x.addr, align 4, !dbg !8 + %tmp1 = load i32, i32* %y.addr, align 4, !dbg !8 + %add = add nsw i32 %tmp, %tmp1, !dbg !8 + %tmp2 = load i32, i32* %x.addr, align 4, !dbg !8 + %tmp3 = load i32, i32* %y.addr, align 4, !dbg !8 + %call = call i32 @_Z3subii(i32 %tmp2, i32 %tmp3), !dbg !8 + ret i32 %add, !dbg !8 +} + +define i32 @_Z3subii(i32 %x, i32 %y) #0 !dbg !9 { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %tmp = load i32, i32* %x.addr, align 4, !dbg !10 + %tmp1 = load i32, i32* %y.addr, align 4, !dbg !10 + %add = sub nsw i32 %tmp, %tmp1, !dbg !10 + ret i32 %add, !dbg !11 +} + +define i32 @main() #0 !dbg !12 { +entry: + %retval = alloca i32, align 4 + %s = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval + store i32 0, i32* %i, align 4, !dbg !13 + br label %while.cond, !dbg !14 + +while.cond: ; preds = %if.end, %entry + %tmp = load i32, i32* %i, align 4, !dbg !15 + %inc = add nsw i32 %tmp, 1, !dbg !15 + store i32 %inc, i32* %i, align 4, !dbg !15 + %cmp = icmp slt i32 %tmp, 400000000, !dbg !15 + br i1 %cmp, label %while.body, label %while.end, !dbg !15 + +while.body: ; preds = %while.cond + %tmp1 = load i32, i32* %i, align 4, !dbg !17 + %cmp1 = icmp ne i32 %tmp1, 100, !dbg !17 + br i1 %cmp1, label %if.then, label %if.else, !dbg !17 + +if.then: ; preds = %while.body + %tmp2 = load i32, i32* %i, align 4, !dbg !19 + %tmp3 = load i32, i32* %s, align 4, !dbg !19 + %call = call i32 @_Z3sumii(i32 %tmp2, i32 %tmp3), !dbg !19 + store i32 %call, i32* %s, align 4, !dbg !19 + br label %if.end, !dbg !19 + +if.else: ; preds = %while.body + store i32 30, i32* %s, align 4, !dbg !21 + br label %if.end + +if.end: ; preds = %if.else, %if.then + br label %while.cond, !dbg !23 + +while.end: ; preds = %while.cond + %tmp4 = load i32, i32* %s, align 4, !dbg !25 + %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %tmp4), !dbg !25 + ret i32 0, !dbg !26 +} + +declare i32 @printf(i8*, ...) + +attributes #0 = { "use-sample-profile" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "calls.cc", directory: ".") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 1, !"Debug Info Version", i32 3} +!5 = !{!"clang version 3.5 "} +!6 = distinct !DISubprogram(name: "sum", linkageName: "_Z3sumii", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 4, scope: !6) +!9 = distinct !DISubprogram(name: "sub", linkageName: "_Z3subii", scope: !1, file: !1, line: 20, type: !7, scopeLine: 20, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!10 = !DILocation(line: 20, scope: !9) +!11 = !DILocation(line: 21, scope: !9) +!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!13 = !DILocation(line: 8, scope: !12) +!14 = !DILocation(line: 9, scope: !12) +!15 = !DILocation(line: 9, scope: !16) +!16 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 2) +!17 = !DILocation(line: 10, scope: !18) +!18 = distinct !DILexicalBlock(scope: !12, file: !1, line: 10) +!19 = !DILocation(line: 10, scope: !20) +!20 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 2) +!21 = !DILocation(line: 10, scope: !22) +!22 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 4) +!23 = !DILocation(line: 10, scope: !24) +!24 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 6) +!25 = !DILocation(line: 11, scope: !12) +!26 = !DILocation(line: 12, scope: !12) + +; REPLAY: _Z3sumii inlined into main +; REPLAY: _Z3subii inlined into main +; REPLAY-NOT: _Z3subii inlined into _Z3sumii