diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -874,16 +874,20 @@ /// Return the total number of samples collected inside the function. uint64_t getTotalSamples() const { return TotalSamples; } - /// Return the total number of branch samples that have the function as the - /// branch target. This should be equivalent to the sample of the first - /// instruction of the symbol. But as we directly get this info for raw - /// profile without referring to potentially inaccurate debug info, this + /// For top-level functions, return the total number of branch samples that + /// have the function as the branch target (or 0 otherwise). This is the raw + /// data fetched from the profile. This should be equivalent to the sample of + /// the first instruction of the symbol. But as we directly get this info for + /// raw profile without referring to potentially inaccurate debug info, this /// gives more accurate profile data and is preferred for standalone symbols. uint64_t getHeadSamples() const { return TotalHeadSamples; } - /// Return the sample count of the first instruction of the function. + /// Return an estimate of the sample count of the function entry basic block. /// The function can be either a standalone symbol or an inlined function. - uint64_t getEntrySamples() const { + /// For Context-Sensitive profiles, this will prefer returning the head + /// samples (i.e. getHeadSamples()), if non-zero. Otherwise it estimates from + /// the function body's samples or callsite samples. + uint64_t getHeadSamplesEstimate() const { if (FunctionSamples::ProfileIsCS && getHeadSamples()) { // For CS profile, if we already have more accurate head samples // counted by branch sample from caller, use them as entry samples. @@ -900,7 +904,7 @@ // An indirect callsite may be promoted to several inlined direct calls. // We need to get the sum of them. for (const auto &N_FS : CallsiteSamples.begin()->second) - Count += N_FS.second.getEntrySamples(); + Count += N_FS.second.getHeadSamplesEstimate(); } // Return at least 1 if total sample is not 0. return Count ? Count : TotalSamples > 0; diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h --- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h +++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h @@ -105,7 +105,7 @@ if (!CalleeSamples || !CallerSamples) { Weight = 0; } else { - uint64_t CalleeEntryCount = CalleeSamples->getEntrySamples(); + uint64_t CalleeEntryCount = CalleeSamples->getHeadSamplesEstimate(); uint64_t CallsiteCount = 0; LineLocation Callsite = Callee->getCallSiteLoc(); if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) { @@ -169,7 +169,7 @@ for (const auto &InlinedSamples : CallsiteSamples.second) { addProfiledFunction(InlinedSamples.first); addProfiledCall(Samples.getFuncName(), InlinedSamples.first, - InlinedSamples.second.getEntrySamples()); + InlinedSamples.second.getHeadSamplesEstimate()); addProfiledCalls(InlinedSamples.second); } } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -687,8 +687,8 @@ auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) { assert(L && R && "Expect non-null FunctionSamples"); - if (L->getEntrySamples() != R->getEntrySamples()) - return L->getEntrySamples() > R->getEntrySamples(); + if (L->getHeadSamplesEstimate() != R->getHeadSamplesEstimate()) + return L->getHeadSamplesEstimate() > R->getHeadSamplesEstimate(); return FunctionSamples::getGUID(L->getName()) < FunctionSamples::getGUID(R->getName()); }; @@ -703,7 +703,7 @@ // as that already includes both inlined callee and non-inlined ones.. Sum = 0; for (const auto *const FS : CalleeSamples) { - Sum += FS->getEntrySamples(); + Sum += FS->getHeadSamplesEstimate(); R.push_back(FS); } llvm::sort(R, FSCompare); @@ -724,7 +724,7 @@ if (M->empty()) return R; for (const auto &NameFS : *M) { - Sum += NameFS.second.getEntrySamples(); + Sum += NameFS.second.getHeadSamplesEstimate(); R.push_back(&NameFS.second); } llvm::sort(R, FSCompare); @@ -1043,7 +1043,7 @@ bool PreInline = UsePreInlinerDecision && CalleeSample->getContext().hasAttribute(ContextShouldBeInlined); - if (!PreInline && CalleeSample->getEntrySamples() < Threshold) + if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold) continue; StringRef Name = CalleeSample->getFuncName(); @@ -1124,7 +1124,8 @@ assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) && "GUIDToFuncNameMap has to be populated"); AllCandidates.push_back(CB); - if (FS->getEntrySamples() > 0 || FunctionSamples::ProfileIsCS) + if (FS->getHeadSamplesEstimate() > 0 || + FunctionSamples::ProfileIsCS) LocalNotInlinedCallSites.try_emplace(CB, FS); if (callsiteIsHot(FS, PSI, ProfAccForSymsInList)) Hot = true; @@ -1164,7 +1165,7 @@ if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList)) continue; - Candidate = {I, FS, FS->getEntrySamples(), 1.0}; + Candidate = {I, FS, FS->getHeadSamplesEstimate(), 1.0}; if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) { LocalNotInlinedCallSites.erase(I); LocalChanged = true; @@ -1278,7 +1279,7 @@ Factor = Probe->Factor; uint64_t CallsiteCount = - CalleeSamples ? CalleeSamples->getEntrySamples() * Factor : 0; + CalleeSamples ? CalleeSamples->getHeadSamplesEstimate() * Factor : 0; *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor}; return true; } @@ -1434,7 +1435,7 @@ continue; } uint64_t EntryCountDistributed = - FS->getEntrySamples() * Candidate.CallsiteDistribution; + FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution; // In addition to regular inline cost check, we also need to make sure // ICP isn't introducing excessive speculative checks even if individual // target looks beneficial to promote and inline. That means we should @@ -1521,7 +1522,7 @@ ++NumCSNotInlined; const FunctionSamples *FS = Pair.getSecond(); - if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) { + if (FS->getTotalSamples() == 0 && FS->getHeadSamplesEstimate() == 0) { continue; } @@ -1539,7 +1540,7 @@ // Use entry samples as head samples during the merge, as inlinees // don't have head samples. const_cast(FS)->addHeadSamples( - FS->getEntrySamples()); + FS->getHeadSamplesEstimate()); // Note that we have to do the merge right after processing function. // This allows OutlineFS's profile to be used for annotation during @@ -1552,7 +1553,7 @@ } else { auto pair = notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0}); - pair.first->second.entryCount += FS->getEntrySamples(); + pair.first->second.entryCount += FS->getHeadSamplesEstimate(); } } } @@ -1616,7 +1617,7 @@ if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) { for (const auto &NameFS : *M) - Sum += NameFS.second.getEntrySamples(); + Sum += NameFS.second.getHeadSamplesEstimate(); } } if (Sum) diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -2471,9 +2471,10 @@ (ProfileTotalSample > 0) ? (Func.getTotalSamples() * 100.0) / ProfileTotalSample : 0; - PrintValues.emplace_back(HotFuncInfo( - Func.getContext().toString(), Func.getTotalSamples(), - TotalSamplePercent, FuncPair.second.second, Func.getEntrySamples())); + PrintValues.emplace_back( + HotFuncInfo(Func.getContext().toString(), Func.getTotalSamples(), + TotalSamplePercent, FuncPair.second.second, + Func.getHeadSamplesEstimate())); } dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount, Profiles.size(), HotFuncSample, ProfileTotalSample, diff --git a/llvm/tools/llvm-profgen/CSPreInliner.cpp b/llvm/tools/llvm-profgen/CSPreInliner.cpp --- a/llvm/tools/llvm-profgen/CSPreInliner.cpp +++ b/llvm/tools/llvm-profgen/CSPreInliner.cpp @@ -117,7 +117,7 @@ // Call site count is more reliable, so we look up the corresponding call // target profile in caller's context profile to retrieve call site count. - uint64_t CalleeEntryCount = CalleeSamples->getEntrySamples(); + uint64_t CalleeEntryCount = CalleeSamples->getHeadSamplesEstimate(); uint64_t CallsiteCount = 0; LineLocation Callsite = CalleeNode->getCallSiteLoc(); if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) { diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -930,7 +930,7 @@ FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode); // Since we don't have call count for inlined functions, we // estimate it from inlinee's profile using entry body sample. - uint64_t EstimatedCallCount = CalleeProfile->getEntrySamples(); + uint64_t EstimatedCallCount = CalleeProfile->getHeadSamplesEstimate(); // If we don't have samples with location, use 1 to indicate live. if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size()) EstimatedCallCount = 1;