Index: include/llvm/IR/CallSite.h =================================================================== --- include/llvm/IR/CallSite.h +++ include/llvm/IR/CallSite.h @@ -110,12 +110,12 @@ /// Return true if the callsite is an indirect call. bool isIndirectCall() const { - Value *V = getCalledValue(); + const Value *V = getCalledValue(); if (!V) return false; if (isa(V) || isa(V)) return false; - if (CallInst *CI = dyn_cast(getInstruction())) { + if (const CallInst *CI = dyn_cast(getInstruction())) { if (CI->isInlineAsm()) return false; } Index: include/llvm/ProfileData/SampleProf.h =================================================================== --- include/llvm/ProfileData/SampleProf.h +++ include/llvm/ProfileData/SampleProf.h @@ -296,10 +296,29 @@ /// Return the total number of samples collected inside the function. uint64_t getTotalSamples() const { return TotalSamples; } - /// Return the total number of samples collected at the head of the - /// function. + /// Return the total number of branch samples that have the function as the + /// branch target. This should be equivalent to the sample of the first + /// instruction of the symbol. But as we directly get this info for raw + /// profile without referring to potentially inaccurate debug info, this + /// gives more accurate profile data and is preferred for standalong symbols. uint64_t getHeadSamples() const { return TotalHeadSamples; } + /// Return the sample count of the first instruction of the function. + /// The function can be either a standalone symbol or an inlined function. + uint64_t getEntrySamples() const { + if (!BodySamples.empty() && + (CallsiteSamples.empty() || + BodySamples.begin()->first < CallsiteSamples.begin()->first)) + return BodySamples.begin()->second.getSamples(); + if (!CallsiteSamples.empty()) { + uint64_t T = 0; + for (const auto &N_FS : CallsiteSamples.begin()->second) + T += N_FS.second.getEntrySamples(); + return T; + } + return 0; + } + /// Return all the samples collected in the body of the function. const BodySampleMap &getBodySamples() const { return BodySamples; } Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -171,7 +171,7 @@ ErrorOr getBlockWeight(const BasicBlock *BB); const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const; std::vector - findIndirectCallFunctionSamples(const Instruction &I) const; + findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const; const FunctionSamples *findFunctionSamples(const Instruction &I) const; bool inlineCallInstruction(Instruction *I); bool inlineHotFunctions(Function &F, @@ -515,6 +515,7 @@ // it means that the inlined callsite has no sample, thus the call // instruction should have 0 count. if ((isa(Inst) || isa(Inst)) && + !ImmutableCallSite(&Inst).isIndirectCall() && findCalleeFunctionSamples(Inst)) return 0; @@ -623,10 +624,11 @@ } /// Returns a vector of FunctionSamples that are the indirect call targets -/// of \p Inst. The vector is sorted by the total number of samples. +/// of \p Inst. The vector is sorted by the total number of samples. Stores +/// the total call count of the indirect call in \p Sum. std::vector SampleProfileLoader::findIndirectCallFunctionSamples( - const Instruction &Inst) const { + const Instruction &Inst, uint64_t &Sum) const { const DILocation *DIL = Inst.getDebugLoc(); std::vector R; @@ -638,16 +640,25 @@ if (FS == nullptr) return R; + uint32_t LineOffset = getOffset(DIL); + uint32_t Discriminator = DIL->getBaseDiscriminator(); + + auto T = FS->findCallTargetMapAt(LineOffset, Discriminator); + Sum = 0; + if (T) + for (const auto &T_C : T.get()) + Sum += T_C.second; if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt( LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()))) { if (M->size() == 0) return R; for (const auto &NameFS : *M) { + Sum += NameFS.second.getEntrySamples(); R.push_back(&NameFS.second); } std::sort(R.begin(), R.end(), [](const FunctionSamples *L, const FunctionSamples *R) { - return L->getTotalSamples() > R->getTotalSamples(); + return L->getEntrySamples() > R->getEntrySamples(); }); } return R; @@ -762,7 +773,8 @@ if (CallSite(I).isIndirectCall()) { if (PromotedInsns.count(I)) continue; - for (const auto *FS : findIndirectCallFunctionSamples(*I)) { + uint64_t Sum; + for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) { if (IsThinLTOPreLink) { FS->findImportedFunctions(ImportGUIDs, F.getParent(), Samples->getTotalSamples() * @@ -784,13 +796,11 @@ !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() && isLegalToPromote(I, R->getValue(), &Reason)) { - // The indirect target was promoted and inlined in the profile, - // as a result, we do not have profile info for the branch - // probability. We set the probability to 80% taken to indicate - // that the static call is likely taken. + uint64_t C = FS->getEntrySamples(); Instruction *DI = dyn_cast( - promoteIndirectCall(I, R->getValue(), 80, 100, false, ORE) + promoteIndirectCall(I, R->getValue(), C, Sum, false, ORE) ->stripPointerCasts()); + Sum -= C; PromotedInsns.insert(I); // If profile mismatches, we should not attempt to inline DI. if ((isa(DI) || isa(DI)) && @@ -1145,24 +1155,20 @@ } } -/// Sorts the CallTargetMap \p M by count in descending order and stores the -/// sorted result in \p Sorted. Returns the total counts. -static uint64_t SortCallTargets(SmallVector &Sorted, - const SampleRecord::CallTargetMap &M) { - Sorted.clear(); - uint64_t Sum = 0; - for (auto I = M.begin(); I != M.end(); ++I) { - Sum += I->getValue(); - Sorted.push_back({Function::getGUID(I->getKey()), I->getValue()}); - } - std::sort(Sorted.begin(), Sorted.end(), +/// Returns the sorted CallTargetMap \p M by count in descending order. +static SmallVector SortCallTargets( + const SampleRecord::CallTargetMap &M) { + SmallVector R; + for (auto I = M.begin(); I != M.end(); ++I) + R.push_back({Function::getGUID(I->getKey()), I->getValue()}); + std::sort(R.begin(), R.end(), [](const InstrProfValueData &L, const InstrProfValueData &R) { if (L.Count == R.Count) return L.Value > R.Value; else return L.Count > R.Count; }); - return Sum; + return R; } /// \brief Propagate weights into edges @@ -1255,8 +1261,10 @@ auto T = FS->findCallTargetMapAt(LineOffset, Discriminator); if (!T || T.get().size() == 0) continue; - SmallVector SortedCallTargets; - uint64_t Sum = SortCallTargets(SortedCallTargets, T.get()); + SmallVector SortedCallTargets = + SortCallTargets(T.get()); + uint64_t Sum; + findIndirectCallFunctionSamples(I, Sum); annotateValueSite(*I.getParent()->getParent()->getParent(), I, SortedCallTargets, Sum, IPVK_IndirectCallTarget, SortedCallTargets.size()); Index: test/Transforms/SampleProfile/Inputs/indirect-call.prof =================================================================== --- test/Transforms/SampleProfile/Inputs/indirect-call.prof +++ test/Transforms/SampleProfile/Inputs/indirect-call.prof @@ -1,6 +1,7 @@ test:63067:0 1: 3345 _Z3barv:1398 _Z3foov:2059 test_inline:3000:0 + 1: 1000 foo_inline3:1000 1: foo_inline1:3000 11: 3000 1: foo_inline2:4000 Index: test/Transforms/SampleProfile/indirect-call.ll =================================================================== --- test/Transforms/SampleProfile/indirect-call.ll +++ test/Transforms/SampleProfile/indirect-call.ll @@ -17,14 +17,16 @@ store i64* (i32*)* %0, i64* (i32*)** %2 %3 = load i64* (i32*)*, i64* (i32*)** %2 ; CHECK: icmp {{.*}} @foo_inline2 +; CHECK: br {{.*}} !prof ![[BR1:[0-9]+]] ; CHECK: if.true.direct_targ: ; CHECK-NOT: call ; CHECK: if.false.orig_indirect: ; CHECK: icmp {{.*}} @foo_inline1 +; CHECK: br {{.*}} !prof ![[BR2:[0-9]+]] ; CHECK: if.true.direct_targ1: ; CHECK-NOT: call ; CHECK: if.false.orig_indirect2: -; CHECK: call +; CHECK: call {{.*}} !prof ![[VP:[0-9]+]] call i64* %3(i32* %x), !dbg !7 ret void } @@ -152,6 +154,9 @@ !4 = !DILocation(line: 4, scope: !3) !5 = !DILocation(line: 6, scope: !3) ; CHECK: ![[PROF]] = !{!"VP", i32 0, i64 3457, i64 9191153033785521275, i64 2059, i64 -1069303473483922844, i64 1398} +; CHECK: ![[BR1]] = !{!"branch_weights", i32 4000, i32 4000} +; CHECK: ![[BR2]] = !{!"branch_weights", i32 3000, i32 1000} +; CHECK: ![[VP]] = !{!"VP", i32 0, i64 8000, i64 -6391416044382067764, i64 1000} !6 = distinct !DISubprogram(name: "test_inline", scope: !1, file: !1, line: 6, unit: !0) !7 = !DILocation(line: 7, scope: !6) !8 = distinct !DISubprogram(name: "test_inline_strip", scope: !1, file: !1, line: 8, unit: !0)