diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -881,9 +881,11 @@ /// gives more accurate profile data and is preferred for standalone symbols. uint64_t getHeadSamples() const { return TotalHeadSamples; } - /// Return the sample count of the first instruction of the function. + /// Return an estimate of the sample count of the function entry basic block. /// The function can be either a standalone symbol or an inlined function. - uint64_t getEntrySamples() const { + /// For Context-Sensitive profiles, this will prefer returning the head + /// samples (i.e. getHeadSamples()), if non-zero. + uint64_t getEntryBBSampleCountEstimate() const { if (FunctionSamples::ProfileIsCS && getHeadSamples()) { // For CS profile, if we already have more accurate head samples // counted by branch sample from caller, use them as entry samples. @@ -900,7 +902,7 @@ // An indirect callsite may be promoted to several inlined direct calls. // We need to get the sum of them. for (const auto &N_FS : CallsiteSamples.begin()->second) - Count += N_FS.second.getEntrySamples(); + Count += N_FS.second.getEntryBBSampleCountEstimate(); } // Return at least 1 if total sample is not 0. return Count ? Count : TotalSamples > 0; diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h --- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h +++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h @@ -105,7 +105,8 @@ if (!CalleeSamples || !CallerSamples) { Weight = 0; } else { - uint64_t CalleeEntryCount = CalleeSamples->getEntrySamples(); + uint64_t CalleeEntryCount = + CalleeSamples->getEntryBBSampleCountEstimate(); uint64_t CallsiteCount = 0; LineLocation Callsite = Callee->getCallSiteLoc(); if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) { @@ -169,7 +170,7 @@ for (const auto &InlinedSamples : CallsiteSamples.second) { addProfiledFunction(InlinedSamples.first); addProfiledCall(Samples.getFuncName(), InlinedSamples.first, - InlinedSamples.second.getEntrySamples()); + InlinedSamples.second.getEntryBBSampleCountEstimate()); addProfiledCalls(InlinedSamples.second); } } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -687,8 +687,10 @@ auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) { assert(L && R && "Expect non-null FunctionSamples"); - if (L->getEntrySamples() != R->getEntrySamples()) - return L->getEntrySamples() > R->getEntrySamples(); + if (L->getEntryBBSampleCountEstimate() != + R->getEntryBBSampleCountEstimate()) + return L->getEntryBBSampleCountEstimate() > + R->getEntryBBSampleCountEstimate(); return FunctionSamples::getGUID(L->getName()) < FunctionSamples::getGUID(R->getName()); }; @@ -703,7 +705,7 @@ // as that already includes both inlined callee and non-inlined ones.. Sum = 0; for (const auto *const FS : CalleeSamples) { - Sum += FS->getEntrySamples(); + Sum += FS->getEntryBBSampleCountEstimate(); R.push_back(FS); } llvm::sort(R, FSCompare); @@ -724,7 +726,7 @@ if (M->empty()) return R; for (const auto &NameFS : *M) { - Sum += NameFS.second.getEntrySamples(); + Sum += NameFS.second.getEntryBBSampleCountEstimate(); R.push_back(&NameFS.second); } llvm::sort(R, FSCompare); @@ -1043,7 +1045,7 @@ bool PreInline = UsePreInlinerDecision && CalleeSample->getContext().hasAttribute(ContextShouldBeInlined); - if (!PreInline && CalleeSample->getEntrySamples() < Threshold) + if (!PreInline && CalleeSample->getEntryBBSampleCountEstimate() < Threshold) continue; StringRef Name = CalleeSample->getFuncName(); @@ -1124,7 +1126,8 @@ assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) && "GUIDToFuncNameMap has to be populated"); AllCandidates.push_back(CB); - if (FS->getEntrySamples() > 0 || FunctionSamples::ProfileIsCS) + if (FS->getEntryBBSampleCountEstimate() > 0 || + FunctionSamples::ProfileIsCS) LocalNotInlinedCallSites.try_emplace(CB, FS); if (callsiteIsHot(FS, PSI, ProfAccForSymsInList)) Hot = true; @@ -1164,7 +1167,7 @@ if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList)) continue; - Candidate = {I, FS, FS->getEntrySamples(), 1.0}; + Candidate = {I, FS, FS->getEntryBBSampleCountEstimate(), 1.0}; if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) { LocalNotInlinedCallSites.erase(I); LocalChanged = true; @@ -1278,7 +1281,8 @@ Factor = Probe->Factor; uint64_t CallsiteCount = - CalleeSamples ? CalleeSamples->getEntrySamples() * Factor : 0; + CalleeSamples ? CalleeSamples->getEntryBBSampleCountEstimate() * Factor + : 0; *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor}; return true; } @@ -1433,8 +1437,8 @@ PSI->getOrCompHotCountThreshold()); continue; } - uint64_t EntryCountDistributed = - FS->getEntrySamples() * Candidate.CallsiteDistribution; + uint64_t EntryCountDistributed = FS->getEntryBBSampleCountEstimate() * + Candidate.CallsiteDistribution; // In addition to regular inline cost check, we also need to make sure // ICP isn't introducing excessive speculative checks even if individual // target looks beneficial to promote and inline. That means we should @@ -1521,7 +1525,8 @@ ++NumCSNotInlined; const FunctionSamples *FS = Pair.getSecond(); - if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) { + if (FS->getTotalSamples() == 0 && + FS->getEntryBBSampleCountEstimate() == 0) { continue; } @@ -1539,7 +1544,7 @@ // Use entry samples as head samples during the merge, as inlinees // don't have head samples. const_cast(FS)->addHeadSamples( - FS->getEntrySamples()); + FS->getEntryBBSampleCountEstimate()); // Note that we have to do the merge right after processing function. // This allows OutlineFS's profile to be used for annotation during @@ -1552,7 +1557,7 @@ } else { auto pair = notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0}); - pair.first->second.entryCount += FS->getEntrySamples(); + pair.first->second.entryCount += FS->getEntryBBSampleCountEstimate(); } } } @@ -1616,7 +1621,7 @@ if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) { for (const auto &NameFS : *M) - Sum += NameFS.second.getEntrySamples(); + Sum += NameFS.second.getEntryBBSampleCountEstimate(); } } if (Sum) diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -2471,9 +2471,10 @@ (ProfileTotalSample > 0) ? (Func.getTotalSamples() * 100.0) / ProfileTotalSample : 0; - PrintValues.emplace_back(HotFuncInfo( - Func.getContext().toString(), Func.getTotalSamples(), - TotalSamplePercent, FuncPair.second.second, Func.getEntrySamples())); + PrintValues.emplace_back( + HotFuncInfo(Func.getContext().toString(), Func.getTotalSamples(), + TotalSamplePercent, FuncPair.second.second, + Func.getEntryBBSampleCountEstimate())); } dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount, Profiles.size(), HotFuncSample, ProfileTotalSample, diff --git a/llvm/tools/llvm-profgen/CSPreInliner.cpp b/llvm/tools/llvm-profgen/CSPreInliner.cpp --- a/llvm/tools/llvm-profgen/CSPreInliner.cpp +++ b/llvm/tools/llvm-profgen/CSPreInliner.cpp @@ -117,7 +117,7 @@ // Call site count is more reliable, so we look up the corresponding call // target profile in caller's context profile to retrieve call site count. - uint64_t CalleeEntryCount = CalleeSamples->getEntrySamples(); + uint64_t CalleeEntryCount = CalleeSamples->getEntryBBSampleCountEstimate(); uint64_t CallsiteCount = 0; LineLocation Callsite = CalleeNode->getCallSiteLoc(); if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) { diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -930,7 +930,7 @@ FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode); // Since we don't have call count for inlined functions, we // estimate it from inlinee's profile using entry body sample. - uint64_t EstimatedCallCount = CalleeProfile->getEntrySamples(); + uint64_t EstimatedCallCount = CalleeProfile->getEntryBBSampleCountEstimate(); // If we don't have samples with location, use 1 to indicate live. if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size()) EstimatedCallCount = 1;