Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -52,6 +52,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/Cloning.h" #include @@ -162,7 +163,7 @@ ErrorOr getBlockWeight(const BasicBlock *BB); const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const; const FunctionSamples *findFunctionSamples(const Instruction &I) const; - bool inlineHotFunctions(Function &F); + bool inlineHotFunctions(Function &F, DenseSet PromotedInsns); void printEdgeWeight(raw_ostream &OS, Edge E); void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const; void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB); @@ -614,14 +615,16 @@ /// Iteratively traverse all callsites of the function \p F, and find if /// the corresponding inlined instance exists and is hot in profile. If /// it is hot enough, inline the callsites and adds new callsites of the -/// callee into the caller. -/// -/// TODO: investigate the possibility of not invoking InlineFunction directly. +/// callee into the caller. If the call is an indirect call, first promote +/// it to direct call. Each indirect call should only be promoted once. /// /// \param F function to perform iterative inlining. +/// \param PromotedInsns a set of indirect call instructions that has +/// already been promoted to direct call. /// /// \returns True if there is any inline happened. -bool SampleProfileLoader::inlineHotFunctions(Function &F) { +bool SampleProfileLoader::inlineHotFunctions( + Function &F, DenseSet PromotedInsns) { bool Changed = false; LLVMContext &Ctx = F.getContext(); std::function GetAssumptionCache = [&]( @@ -647,10 +650,19 @@ } for (auto I : CIS) { InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr); - CallSite CS(I); - Function *CalledFunction = CS.getCalledFunction(); + Function *CalledFunction = CallSite(I).getCalledFunction(); + Instruction *DI = I; + if (!CalledFunction && !PromotedInsns.count(I)) { + CalledFunction = F.getParent()->getFunction( + findCalleeFunctionSamples(*I)->getName()); + if (CalledFunction) { + DI = promoteIndirectCall(I, CalledFunction, 80, 100); + PromotedInsns.insert(I); + } + } if (!CalledFunction || !CalledFunction->getSubprogram()) continue; + CallSite CS(DI); DebugLoc DLoc = I->getDebugLoc(); uint64_t NumSamples = findCalleeFunctionSamples(*I)->getTotalSamples(); if (InlineFunction(CS, IFI)) { @@ -1268,7 +1280,8 @@ DEBUG(dbgs() << "Line number for the first instruction in " << F.getName() << ": " << getFunctionLoc(F) << "\n"); - Changed |= inlineHotFunctions(F); + DenseSet PromotedInsns; + Changed |= inlineHotFunctions(F, PromotedInsns); // Compute basic block weights. Changed |= computeBlockWeights(F); Index: test/Transforms/SampleProfile/Inputs/indirect-call.prof =================================================================== --- test/Transforms/SampleProfile/Inputs/indirect-call.prof +++ test/Transforms/SampleProfile/Inputs/indirect-call.prof @@ -1,2 +1,8 @@ test:63067:0 4: 3345 _Z3barv:1398 _Z3foov:2059 +test_inline:3000:0 + 5: foo_inline:3000 + 1: 3000 +test_noinline:3000:0 + 5: foo_noinline:3000 + 1: 3000 Index: test/Transforms/SampleProfile/indirect-call.ll =================================================================== --- test/Transforms/SampleProfile/indirect-call.ll +++ test/Transforms/SampleProfile/indirect-call.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call.prof -S | FileCheck %s +; CHECK-LABEL: @test define void @test(void ()*) !dbg !3 { %2 = alloca void ()* store void ()* %0, void ()** %2 @@ -9,6 +10,37 @@ ret void } +; CHECK-LABEL: @test_inline +; If the indirect call is promoted and inlined in profile, we should promote and inline it. +define void @test_inline(void ()*) !dbg !3 { + %2 = alloca void ()* + store void ()* %0, void ()** %2 + %3 = load void ()*, void ()** %2 +; CHECK: icmp {{.*}} @foo_inline +; CHECK: if.true.direct_targ: +; CHECK-NOT: call +; CHECK: if.false.orig_indirect: +; CHECK: call + call void %3(), !dbg !5 + ret void +} + +; CHECK-LABEL: @test_noinline +; If the indirect call target is not available, we should not promote it. +define void @test_noinline(void ()*) !dbg !3 { + %2 = alloca void ()* + store void ()* %0, void ()** %2 + %3 = load void ()*, void ()** %2 +; CHECK-NOT: icmp +; CHECK: call + call void %3(), !dbg !5 + ret void +} + +define void @foo_inline() !dbg !3 { + ret void +} + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2} @@ -17,4 +49,5 @@ !2 = !{i32 2, !"Debug Info Version", i32 3} !3 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, unit: !0) !4 = !DILocation(line: 5, scope: !3) +!5 = !DILocation(line: 6, scope: !3) ; CHECK: ![[PROF]] = !{!"VP", i32 0, i64 3457, i64 9191153033785521275, i64 2059, i64 -1069303473483922844, i64 1398}