diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -462,6 +462,10 @@ // A pseudo probe helper to correlate the imported sample counts. std::unique_ptr ProbeManager; + + /// Map from promoted indirect calls to their pre-promotion distribution + /// factors. + std::unordered_map IndirectCallDistributions; }; class SampleProfileLoaderLegacyPass : public ModulePass { @@ -831,7 +835,10 @@ /// /// \param F Caller function. /// \param Candidate ICP and inline candidate. -/// \param Sum Sum of target counts for indirect call. +/// \param SumOrigin Original sum of target counts for indirect call before +/// promoting given candidate. +/// \param Sum Prorated sum of remaining target counts for indirect call +/// after promoting given candidate. /// \param InlinedCallSite Output vector for new call sites exposed after /// inlining. bool SampleProfileLoader::tryPromoteAndInlineCandidate( @@ -867,12 +874,12 @@ if (DI) { Sum -= Candidate.CallsiteCount; // Prorate the indirect callsite distribution. + setProbeDistributionFactor(CI, static_cast(Sum) / SumOrigin); // Do not update the promoted direct callsite distribution at this // point since the original distribution combined with the callee // profile will be used to prorate callsites from the callee if // inlined. Once not inlined, the direct callsite distribution should // be prorated so that the it will reflect the real callsite counts. - setProbeDistributionFactor(CI, static_cast(Sum) / SumOrigin); Candidate.CallInstr = DI; if (isa(DI) || isa(DI)) { bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite); @@ -1052,6 +1059,10 @@ continue; if (I->isIndirectCall()) { uint64_t Sum; + // Record original distribution for later use of prorating profile call + // target counts. + float Factor = 1.0; + IndirectCallDistributions[I] = Candidate.CallsiteDistribution; for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) { uint64_t SumOrigin = Sum; if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { @@ -1062,7 +1073,7 @@ if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList)) continue; - Candidate = {I, FS, FS->getEntrySamples(), 1.0}; + Candidate = {I, FS, FS->getEntrySamples(), Factor}; if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) { LocalNotInlinedCallSites.erase(I); LocalChanged = true; @@ -1326,6 +1337,9 @@ auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum); uint64_t SumOrigin = Sum; Sum *= Candidate.CallsiteDistribution; + // Record original distribution for later use of prorating profile call + // target counts. + IndirectCallDistributions[I] = Candidate.CallsiteDistribution; for (const auto *FS : CalleeSamples) { // TODO: Consider disable pre-lTO ICP for MonoLTO as well if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { @@ -1431,12 +1445,14 @@ if (!T || T.get().empty()) continue; // Prorate the callsite counts to reflect what is already done to the - // callsite, such as ICP or calliste cloning. - if (FunctionSamples::ProfileIsProbeBased) { - if (Optional Probe = extractProbe(I)) { - if (Probe->Factor < 1) - T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor); - } + // callsite before ICP, such as calliste cloning. + if (Optional Probe = extractProbe(I)) { + auto Iter = IndirectCallDistributions.find(&I); + float Factor = (Iter == IndirectCallDistributions.end()) + ? Probe->Factor + : Iter->second; + if (Factor < 1) + T = SampleRecord::adjustCallTargets(T.get(), Factor); } SmallVector SortedCallTargets = GetSortedValueDataFromCallTargets(T.get()); diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-icp-factor.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-icp-factor.ll --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-icp-factor.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-icp-factor.ll @@ -197,6 +197,8 @@ !48 = !DILocation(line: 14, column: 10, scope: !35) !49 = !DILocation(line: 14, column: 12, scope: !35) !50 = !DILocation(line: 14, column: 10, scope: !51) +;; A discriminator of 108527639 which is 0x6780017 in hexdecimal, stands for an indirect call probe +;; with an index of 2 and probe factor of 0.79. !51 = !DILexicalBlockFile(scope: !35, file: !1, discriminator: 108527639) !52 = !DILocation(line: 14, column: 3, scope: !35) !53 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 17, type: !54, scopeLine: 18, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !56) @@ -231,6 +233,8 @@ !82 = !DILocation(line: 32, column: 17, scope: !75) !83 = !DILocation(line: 32, column: 20, scope: !75) !84 = !DILocation(line: 32, column: 13, scope: !85) +;; A discriminator of 116916311 which is 0x6f80057 in hexdecimal, stands for an indirect call probe +;; with an index of 10 and probe factor of 0.95. !85 = !DILexicalBlockFile(scope: !75, file: !1, discriminator: 116916311) !86 = !DILocation(line: 32, column: 11, scope: !75) !87 = !DILocation(line: 33, column: 5, scope: !75) @@ -244,9 +248,14 @@ !95 = !DILocation(line: 36, column: 1, scope: !53) !96 = !DILocation(line: 35, column: 5, scope: !53) -; CHECK: %[[#]] = call i32 (i32, ...) %30(i32 %[[#]]) #[[#]], !dbg ![[#DBGID:]], !prof ![[#]] +; CHECK: define dso_local i32 @main +; CHECK: %[[#]] = call i32 (i32, ...) %[[#]](i32 %[[#]]) #[[#]], !dbg ![[#DBGID:]], !prof ![[#PROF:]] ;; A discriminator of 69206039 which is 0x4200017 in hexdecimal, stands for an indirect call probe ;; with an index of 2 and probe factor of 0.04. ; CHECK: ![[#DBGID]] = !DILocation(line: [[#]], column: [[#]], scope: ![[#SCOPE:]], inlinedAt: ![[#]]) ; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 69206039) + +;; The remaining count of the second target (bar) should be from the original count multiplied by two callsite +;; factors, i.e, roughly 11259 * 0.95 * 0.79 = 8444. +; CHECK: ![[#PROF]] = !{!"VP", i32 0, i64 8444, i64 7546896869197086323, i64 -1, i64 -2012135647395072713, i64 8444}