diff --git a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h @@ -0,0 +1,37 @@ +//===- ReplayInlineAdvisor.h - Replay Inline Advisor interface -*- C++ --*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +#ifndef LLVM_REPLAYINLINEADVISOR_H_ +#define LLVM_REPLAYINLINEADVISOR_H_ + +#include "llvm/ADT/StringSet.h" +#include "llvm/Analysis/InlineAdvisor.h" +#include "llvm/IR/LLVMContext.h" + +namespace llvm { +class BasicBlock; +class CallBase; +class Function; +class Module; +class OptimizationRemarkEmitter; + +/// Replay inline advisor that uses optimization remarks from inlining of +/// previous build to guide current inlining. This is useful for inliner tuning. +class ReplayInlineAdvisor : public InlineAdvisor { +public: + ReplayInlineAdvisor(FunctionAnalysisManager &FAM, LLVMContext &Context, + StringRef RemarksFile); + std::unique_ptr getAdvice(CallBase &CB) override; + bool areReplayRemarksLoaded() const { return HasReplayRemarks; } + +private: + StringSet<> InlineSitesFromRemarks; + bool HasReplayRemarks = false; +}; +} // namespace llvm +#endif // LLVM_REPLAYINLINEADVISOR_H_ \ No newline at end of file diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -98,6 +98,7 @@ RegionInfo.cpp RegionPass.cpp RegionPrinter.cpp + ReplayInlineAdvisor.cpp ScalarEvolution.cpp ScalarEvolutionAliasAnalysis.cpp ScalarEvolutionDivision.cpp diff --git a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp @@ -0,0 +1,83 @@ +//===- ReplayInlineAdvisor.cpp - Replay InlineAdvisor ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements ReplayInlineAdvisor that replays inline decision based +// on previous inline remarks from optimization remark log. +// +//===----------------------------------------------------------------------===// +#include + +#include "llvm/Analysis/InlineAdvisor.h" +#include "llvm/Analysis/ReplayInlineAdvisor.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/LineIterator.h" + +using namespace llvm; + +#define DEBUG_TYPE "inline-replay" + +ReplayInlineAdvisor::ReplayInlineAdvisor(FunctionAnalysisManager &FAM, + LLVMContext &Context, + StringRef RemarksFile) + : InlineAdvisor(FAM), HasReplayRemarks(false) { + auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(RemarksFile); + std::error_code EC = BufferOrErr.getError(); + if (EC) { + Context.emitError("Could not open remarks file: " + EC.message()); + return; + } + + line_iterator LineIt(*BufferOrErr.get(), /*SkipBlanks=*/true); + for (; !LineIt.is_at_eof(); ++LineIt) { + StringRef Line = *LineIt; + auto Pair = Line.split(" at callsite "); + if (Pair.second.empty()) + continue; + InlineSitesFromRemarks.insert(Pair.second); + } + HasReplayRemarks = true; +} + +std::unique_ptr ReplayInlineAdvisor::getAdvice(CallBase &CB) { + assert(HasReplayRemarks); + + Function &Caller = *CB.getCaller(); + auto &ORE = FAM.getResult(Caller); + + if (InlineSitesFromRemarks.empty()) + return std::make_unique(this, CB, ORE, false); + + // Example for inline remarks to parse: + // _Z3subii inlined into main [details] at callsite sum:1 @ main:3.1 + // We use the callsite string after `at callsite` to replay inlining. + std::ostringstream CallSiteLoc; + auto DLoc = CB.getDebugLoc(); + bool First = true; + for (DILocation *DIL = DLoc.get(); DIL; DIL = DIL->getInlinedAt()) { + if (!First) + CallSiteLoc << " @ "; + // Note that negative line offset is actually possible, but we use + // unsigned int to match line offset representation in remarks so + // it's directly consumable by relay advisor. + uint32_t Offset = + DIL->getLine() - DIL->getScope()->getSubprogram()->getLine(); + uint32_t Discriminator = DIL->getBaseDiscriminator(); + StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); + if (Name.empty()) + Name = DIL->getScope()->getSubprogram()->getName(); + CallSiteLoc << Name.str() << ":" << llvm::utostr(Offset); + if (Discriminator) { + CallSiteLoc << "." << llvm::utostr(Discriminator); + } + First = false; + } + + bool InlineRecommended = InlineSitesFromRemarks.count(CallSiteLoc.str()) > 0; + return std::make_unique(this, CB, ORE, InlineRecommended); +} diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -43,6 +43,7 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/ReplayInlineAdvisor.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" @@ -170,6 +171,13 @@ "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites")); +static cl::opt ProfileInlineReplayFile( + "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), + cl::desc( + "Optimization remarks file containing inline remarks to be replayed " + "by inlining from sample profile loader."), + cl::Hidden); + namespace { using BlockWeightMap = DenseMap; @@ -319,7 +327,7 @@ RemappingFilename(std::string(RemapName)), IsThinLTOPreLink(IsThinLTOPreLink) {} - bool doInitialization(Module &M); + bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr); bool runOnModule(Module &M, ModuleAnalysisManager *AM, ProfileSummaryInfo *_PSI, CallGraph *CG); @@ -473,6 +481,9 @@ // overriden by -profile-sample-accurate or profile-sample-accurate // attribute. bool ProfAccForSymsInList; + + // External inline advisor used to replay inline decision from remarks. + std::unique_ptr ExternalInlineAdvisor; }; class SampleProfileLoaderLegacyPass : public ModulePass { @@ -898,6 +909,16 @@ } bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) { + if (ExternalInlineAdvisor) { + auto Advice = ExternalInlineAdvisor->getAdvice(CB); + if (!Advice->isInliningRecommended()) { + Advice->recordUnattemptedInlining(); + return false; + } + // Dummy record, we don't use it for replay. + Advice->recordInlining(); + } + Function *CalledFunction = CB.getCalledFunction(); assert(CalledFunction); DebugLoc DLoc = CB.getDebugLoc(); @@ -1005,7 +1026,7 @@ } } } - if (Hot) { + if (Hot || ExternalInlineAdvisor) { CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end()); emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true); } else { @@ -1818,7 +1839,8 @@ return FunctionOrderList; } -bool SampleProfileLoader::doInitialization(Module &M) { +bool SampleProfileLoader::doInitialization(Module &M, + FunctionAnalysisManager *FAM) { auto &Ctx = M.getContext(); std::unique_ptr RemapReader; @@ -1843,6 +1865,13 @@ NamesInProfile.insert(NameTable->begin(), NameTable->end()); } + if (FAM && !ProfileInlineReplayFile.empty()) { + ExternalInlineAdvisor = std::make_unique( + *FAM, Ctx, ProfileInlineReplayFile); + if (!ExternalInlineAdvisor->areReplayRemarksLoaded()) + ExternalInlineAdvisor.reset(); + } + return true; } @@ -1995,7 +2024,7 @@ : ProfileRemappingFileName, IsThinLTOPreLink, GetAssumptionCache, GetTTI, GetTLI); - if (!SampleLoader.doInitialization(M)) + if (!SampleLoader.doInitialization(M, &FAM)) return PreservedAnalyses::all(); ProfileSummaryInfo *PSI = &AM.getResult(M); diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt b/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt @@ -0,0 +1,2 @@ +remark: calls.cc:10:0: _Z3sumii inlined into main to match profiling context with (cost=45, threshold=337) at callsite main:3.1 +remark: calls.cc:4:0: _Z3subii inlined into main to match profiling context with (cost=-5, threshold=337) at callsite sum:1 @ main:3.1 diff --git a/llvm/test/Transforms/SampleProfile/inline-replay.ll b/llvm/test/Transforms/SampleProfile/inline-replay.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/inline-replay.ll @@ -0,0 +1,122 @@ +;; Note that this needs new pass manager for now. Passing `-sample-profile-inline-replay` to legacy pass manager is a no-op. + +;; Check baseline inline decisions +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=DEFAULT %s + +;; Check replay inline decisions +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-inline-replay=%S/Inputs/inline-replay.txt -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=REPLAY %s + +@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 + +define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !6 { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %tmp = load i32, i32* %x.addr, align 4, !dbg !8 + %tmp1 = load i32, i32* %y.addr, align 4, !dbg !8 + %add = add nsw i32 %tmp, %tmp1, !dbg !8 + %tmp2 = load i32, i32* %x.addr, align 4, !dbg !8 + %tmp3 = load i32, i32* %y.addr, align 4, !dbg !8 + %call = call i32 @_Z3subii(i32 %tmp2, i32 %tmp3), !dbg !8 + ret i32 %add, !dbg !8 +} + +define i32 @_Z3subii(i32 %x, i32 %y) #0 !dbg !9 { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %tmp = load i32, i32* %x.addr, align 4, !dbg !10 + %tmp1 = load i32, i32* %y.addr, align 4, !dbg !10 + %add = sub nsw i32 %tmp, %tmp1, !dbg !10 + ret i32 %add, !dbg !11 +} + +define i32 @main() #0 !dbg !12 { +entry: + %retval = alloca i32, align 4 + %s = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval + store i32 0, i32* %i, align 4, !dbg !13 + br label %while.cond, !dbg !14 + +while.cond: ; preds = %if.end, %entry + %tmp = load i32, i32* %i, align 4, !dbg !15 + %inc = add nsw i32 %tmp, 1, !dbg !15 + store i32 %inc, i32* %i, align 4, !dbg !15 + %cmp = icmp slt i32 %tmp, 400000000, !dbg !15 + br i1 %cmp, label %while.body, label %while.end, !dbg !15 + +while.body: ; preds = %while.cond + %tmp1 = load i32, i32* %i, align 4, !dbg !17 + %cmp1 = icmp ne i32 %tmp1, 100, !dbg !17 + br i1 %cmp1, label %if.then, label %if.else, !dbg !17 + +if.then: ; preds = %while.body + %tmp2 = load i32, i32* %i, align 4, !dbg !19 + %tmp3 = load i32, i32* %s, align 4, !dbg !19 + %call = call i32 @_Z3sumii(i32 %tmp2, i32 %tmp3), !dbg !19 + store i32 %call, i32* %s, align 4, !dbg !19 + br label %if.end, !dbg !19 + +if.else: ; preds = %while.body + store i32 30, i32* %s, align 4, !dbg !21 + br label %if.end + +if.end: ; preds = %if.else, %if.then + br label %while.cond, !dbg !23 + +while.end: ; preds = %while.cond + %tmp4 = load i32, i32* %s, align 4, !dbg !25 + %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %tmp4), !dbg !25 + ret i32 0, !dbg !26 +} + +declare i32 @printf(i8*, ...) + +attributes #0 = { "use-sample-profile" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "calls.cc", directory: ".") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 1, !"Debug Info Version", i32 3} +!5 = !{!"clang version 3.5 "} +!6 = distinct !DISubprogram(name: "sum", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 4, scope: !6) +!9 = distinct !DISubprogram(name: "sub", scope: !1, file: !1, line: 20, type: !7, scopeLine: 20, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!10 = !DILocation(line: 20, scope: !9) +!11 = !DILocation(line: 21, scope: !9) +!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!13 = !DILocation(line: 8, scope: !12) +!14 = !DILocation(line: 9, scope: !12) +!15 = !DILocation(line: 9, scope: !16) +!16 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 2) +!17 = !DILocation(line: 10, scope: !18) +!18 = distinct !DILexicalBlock(scope: !12, file: !1, line: 10) +!19 = !DILocation(line: 10, scope: !20) +!20 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 2) +!21 = !DILocation(line: 10, scope: !22) +!22 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 4) +!23 = !DILocation(line: 10, scope: !24) +!24 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 6) +!25 = !DILocation(line: 11, scope: !12) +!26 = !DILocation(line: 12, scope: !12) + + +; DEFAULT: _Z3sumii inlined into main +; DEFAULT: _Z3subii inlined into _Z3sumii +; DEFAULT-NOT: _Z3subii inlined into main + +; REPLAY: _Z3sumii inlined into main +; REPLAY: _Z3subii inlined into main +; REPLA-NOT: _Z3subii inlined into _Z3sumii