Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -52,6 +52,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/Cloning.h" #include @@ -614,14 +615,14 @@ /// Iteratively traverse all callsites of the function \p F, and find if /// the corresponding inlined instance exists and is hot in profile. If /// it is hot enough, inline the callsites and adds new callsites of the -/// callee into the caller. -/// -/// TODO: investigate the possibility of not invoking InlineFunction directly. +/// callee into the caller. If the call is an indirect call, first promote +/// it to direct call. Each indirect call is limited with a single target. /// /// \param F function to perform iterative inlining. /// /// \returns True if there is any inline happened. bool SampleProfileLoader::inlineHotFunctions(Function &F) { + DenseSet PromotedInsns; bool Changed = false; LLVMContext &Ctx = F.getContext(); std::function GetAssumptionCache = [&]( @@ -647,10 +648,27 @@ } for (auto I : CIS) { InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr); - CallSite CS(I); - Function *CalledFunction = CS.getCalledFunction(); + Function *CalledFunction = CallSite(I).getCalledFunction(); + Instruction *DI = I; + if (!CalledFunction && !PromotedInsns.count(I)) { + auto CalleeFunctionName = findCalleeFunctionSamples(*I)->getName(); + const char *Reason = "Callee function not available"; + CalledFunction = F.getParent()->getFunction(CalleeFunctionName); + if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) { + // The indirect target was promoted and inlined in the profile, as a + // result, we do not have profile info for the branch probability. + // We set the probability to 80% taken to indicate that the static + // call is likely taken. + DI = promoteIndirectCall(I, CalledFunction, 80, 100); + PromotedInsns.insert(I); + } else { + DEBUG(dbgs() << "\nFailed to promote indirect call to " + << CalleeFunctionName << " because " << Reason << "\n"); + } + } if (!CalledFunction || !CalledFunction->getSubprogram()) continue; + CallSite CS(DI); DebugLoc DLoc = I->getDebugLoc(); uint64_t NumSamples = findCalleeFunctionSamples(*I)->getTotalSamples(); if (InlineFunction(CS, IFI)) { Index: test/Transforms/SampleProfile/Inputs/indirect-call.prof =================================================================== --- test/Transforms/SampleProfile/Inputs/indirect-call.prof +++ test/Transforms/SampleProfile/Inputs/indirect-call.prof @@ -1,2 +1,8 @@ test:63067:0 4: 3345 _Z3barv:1398 _Z3foov:2059 +test_inline:3000:0 + 5: foo_inline:3000 + 1: 3000 +test_noinline:3000:0 + 5: foo_noinline:3000 + 1: 3000 Index: test/Transforms/SampleProfile/indirect-call.ll =================================================================== --- test/Transforms/SampleProfile/indirect-call.ll +++ test/Transforms/SampleProfile/indirect-call.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call.prof -S | FileCheck %s +; CHECK-LABEL: @test define void @test(void ()*) !dbg !3 { %2 = alloca void ()* store void ()* %0, void ()** %2 @@ -9,6 +10,41 @@ ret void } +; CHECK-LABEL: @test_inline +; If the indirect call is promoted and inlined in profile, we should promote and inline it. +define void @test_inline(void ()*) !dbg !3 { + %2 = alloca void ()* + store void ()* %0, void ()** %2 + %3 = load void ()*, void ()** %2 +; CHECK: icmp {{.*}} @foo_inline +; CHECK: if.true.direct_targ: +; CHECK-NOT: call +; CHECK: if.false.orig_indirect: +; CHECK: call + call void %3(), !dbg !5 + ret void +} + +; CHECK-LABEL: @test_noinline +; If the indirect call target is not available, we should not promote it. +define void @test_noinline(void ()*) !dbg !3 { + %2 = alloca void ()* + store void ()* %0, void ()** %2 + %3 = load void ()*, void ()** %2 +; CHECK-NOT: icmp +; CHECK: call + call void %3(), !dbg !5 + ret void +} + +define void @foo_inline() !dbg !3 { + ret void +} + +define i32 @foo_noinline(i32 %x) !dbg !3 { + ret i32 %x +} + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2} @@ -17,4 +53,5 @@ !2 = !{i32 2, !"Debug Info Version", i32 3} !3 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, unit: !0) !4 = !DILocation(line: 5, scope: !3) +!5 = !DILocation(line: 6, scope: !3) ; CHECK: ![[PROF]] = !{!"VP", i32 0, i64 3457, i64 9191153033785521275, i64 2059, i64 -1069303473483922844, i64 1398}