Index: include/llvm/Transforms/Instrumentation.h =================================================================== --- include/llvm/Transforms/Instrumentation.h +++ include/llvm/Transforms/Instrumentation.h @@ -88,6 +88,20 @@ createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef("")); ModulePass *createPGOIndirectCallPromotionLegacyPass(bool InLTO = false); +// Helper function that transforms Inst (either a indirect-call instruction, or +// an invoke instruction , to a conditional call to F. This is like: +// if (Inst.CalledValue == F) +// F(...); +// else +// Inst(...); +// end +// TotalCount is the profile count value that the instruction executes. +// Count is the profile count value that F is the target function. +// These two values are being used to update the branch weight. +// Returns the promoted direct call instruction. +Instruction *promoteIndirectCall(Instruction *Inst, Function *F, uint64_t Count, + uint64_t TotalCount); + /// Options for the frontend instrumentation based profiling pass. struct InstrProfOptions { // Add the 'noredzone' attribute to added runtime library calls. Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -52,6 +52,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/Cloning.h" #include @@ -162,7 +163,7 @@ ErrorOr getBlockWeight(const BasicBlock *BB); const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const; const FunctionSamples *findFunctionSamples(const Instruction &I) const; - bool inlineHotFunctions(Function &F); + bool inlineHotFunctions(Function &F, DenseSet PromotedInsns); void printEdgeWeight(raw_ostream &OS, Edge E); void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const; void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB); @@ -621,7 +622,8 @@ /// \param F function to perform iterative inlining. /// /// \returns True if there is any inline happened. -bool SampleProfileLoader::inlineHotFunctions(Function &F) { +bool SampleProfileLoader::inlineHotFunctions( + Function &F, DenseSet PromotedInsns) { bool Changed = false; LLVMContext &Ctx = F.getContext(); std::function GetAssumptionCache = [&]( @@ -647,10 +649,19 @@ } for (auto I : CIS) { InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr); - CallSite CS(I); - Function *CalledFunction = CS.getCalledFunction(); + Function *CalledFunction = CallSite(I).getCalledFunction(); + Instruction *DI = I; + if (!CalledFunction && !PromotedInsns.count(I)) { + CalledFunction = F.getParent()->getFunction( + findCalleeFunctionSamples(*I)->getName()); + if (CalledFunction) { + DI = promoteIndirectCall(I, CalledFunction, 80, 100); + PromotedInsns.insert(I); + } + } if (!CalledFunction || !CalledFunction->getSubprogram()) continue; + CallSite CS(DI); if (InlineFunction(CS, IFI)) { DebugLoc DLoc = I->getDebugLoc(); uint64_t NumSamples = findCalleeFunctionSamples(*I)->getTotalSamples(); @@ -1268,7 +1279,8 @@ DEBUG(dbgs() << "Line number for the first instruction in " << F.getName() << ": " << getFunctionLoc(F) << "\n"); - Changed |= inlineHotFunctions(F); + DenseSet PromotedInsns; + Changed |= inlineHotFunctions(F, PromotedInsns); // Compute basic block weights. Changed |= computeBlockWeights(F); Index: lib/Transforms/Instrumentation/IndirectCallPromotion.cpp =================================================================== --- lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -172,19 +172,6 @@ Instruction *Inst, const ArrayRef &ValueDataRef, uint64_t TotalCount, uint32_t NumCandidates); - // Main function that transforms Inst (either a indirect-call instruction, or - // an invoke instruction , to a conditional call to F. This is like: - // if (Inst.CalledValue == F) - // F(...); - // else - // Inst(...); - // end - // TotalCount is the profile count value that the instruction executes. - // Count is the profile count value that F is the target function. - // These two values are being used to update the branch weight. - void promote(Instruction *Inst, Function *F, uint64_t Count, - uint64_t TotalCount); - // Promote a list of targets for one indirect-call callsite. Return // the number of promotions. uint32_t tryToPromote(Instruction *Inst, @@ -532,8 +519,10 @@ // Ret = phi(Ret1, Ret2); // It adds type casts for the args do not match the parameters and the return // value. Branch weights metadata also updated. -void ICallPromotionFunc::promote(Instruction *Inst, Function *DirectCallee, - uint64_t Count, uint64_t TotalCount) { +// Returns the promoted direct call instruction. +Instruction *llvm::promoteIndirectCall(Instruction *Inst, + Function *DirectCallee, uint64_t Count, + uint64_t TotalCount) { assert(DirectCallee != nullptr); BasicBlock *BB = Inst->getParent(); // Just to suppress the non-debug build warning. @@ -576,9 +565,10 @@ DEBUG(dbgs() << *BB << *DirectCallBB << *IndirectCallBB << *MergeBB << "\n"); emitOptimizationRemark( - F.getContext(), "pgo-icall-prom", F, Inst->getDebugLoc(), + BB->getContext(), "pgo-icall-prom", *BB->getParent(), Inst->getDebugLoc(), Twine("Promote indirect call to ") + DirectCallee->getName() + " with count " + Twine(Count) + " out of " + Twine(TotalCount)); + return NewInst; } // Promote indirect-call to conditional direct-call for one callsite. @@ -589,7 +579,7 @@ for (auto &C : Candidates) { uint64_t Count = C.Count; - promote(Inst, C.TargetFunction, Count, TotalCount); + promoteIndirectCall(Inst, C.TargetFunction, Count, TotalCount); assert(TotalCount >= Count); TotalCount -= Count; NumOfPGOICallPromotion++; Index: test/Transforms/SampleProfile/Inputs/indirect-call.prof =================================================================== --- test/Transforms/SampleProfile/Inputs/indirect-call.prof +++ test/Transforms/SampleProfile/Inputs/indirect-call.prof @@ -1,2 +1,8 @@ test:63067:0 4: 3345 _Z3barv:1398 _Z3foov:2059 +test_inline:3000:0 + 5: foo_inline:3000 + 1: 3000 +test_noinline:3000:0 + 5: foo_noinline:3000 + 1: 3000 Index: test/Transforms/SampleProfile/indirect-call.ll =================================================================== --- test/Transforms/SampleProfile/indirect-call.ll +++ test/Transforms/SampleProfile/indirect-call.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call.prof -S | FileCheck %s +; CHECK-LABEL: @test define void @test(void ()*) !dbg !3 { %2 = alloca void ()* store void ()* %0, void ()** %2 @@ -9,6 +10,37 @@ ret void } +; CHECK-LABEL: @test_inline +; If the indirect call is promoted and inlined in profile, we should promote and inline it. +define void @test_inline(void ()*) !dbg !3 { + %2 = alloca void ()* + store void ()* %0, void ()** %2 + %3 = load void ()*, void ()** %2 +; CHECK: icmp {{.*}} @foo_inline +; CHECK: if.true.direct_targ: +; CHECK-NOT: call +; CHECK: if.false.orig_indirect: +; CHECK: call + call void %3(), !dbg !5 + ret void +} + +; CHECK-LABEL: @test_noinline +; If the indirect call target is not available, we should not promote it. +define void @test_noinline(void ()*) !dbg !3 { + %2 = alloca void ()* + store void ()* %0, void ()** %2 + %3 = load void ()*, void ()** %2 +; CHECK-NOT: icmp +; CHECK: call + call void %3(), !dbg !5 + ret void +} + +define void @foo_inline() !dbg !3 { + ret void +} + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2} @@ -17,4 +49,5 @@ !2 = !{i32 2, !"Debug Info Version", i32 3} !3 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, unit: !0) !4 = !DILocation(line: 5, scope: !3) +!5 = !DILocation(line: 6, scope: !3) ; CHECK: ![[PROF]] = !{!"VP", i32 0, i64 3457, i64 9191153033785521275, i64 2059, i64 -1069303473483922844, i64 1398}