diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h --- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -102,12 +102,19 @@ } } - const PseudoProbeDescriptor *getDesc(const Function &F) const { - auto I = GUIDToProbeDescMap.find( - Function::getGUID(FunctionSamples::getCanonicalFnName(F))); + const PseudoProbeDescriptor *getDesc(uint64_t GUID) const { + auto I = GUIDToProbeDescMap.find(GUID); return I == GUIDToProbeDescMap.end() ? nullptr : &I->second; } + const PseudoProbeDescriptor *getDesc(StringRef FProfileName) const { + return getDesc(Function::getGUID(FProfileName)); + } + + const PseudoProbeDescriptor *getDesc(const Function &F) const { + return getDesc(Function::getGUID(FunctionSamples::getCanonicalFnName(F))); + } + bool moduleIsProbed(const Module &M) const { return M.getNamedMetadata(PseudoProbeDescMetadataName); } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -142,11 +142,6 @@ cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section).")); -static cl::opt FlattenProfileForMatching( - "flatten-profile-for-matching", cl::Hidden, cl::init(true), - cl::desc( - "Use flattened profile for stale profile detection and matching.")); - static cl::opt ProfileSampleAccurate( "profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " @@ -461,12 +456,7 @@ public: SampleProfileMatcher(Module &M, SampleProfileReader &Reader, const PseudoProbeManager *ProbeManager) - : M(M), Reader(Reader), ProbeManager(ProbeManager) { - if (FlattenProfileForMatching) { - ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles, - FunctionSamples::ProfileIsCS); - } - } + : M(M), Reader(Reader), ProbeManager(ProbeManager){}; void runOnModule(); private: @@ -477,7 +467,9 @@ return &It->second; return nullptr; } - void runOnFunction(const Function &F, const FunctionSamples &FS); + void runOnFunction(const Function &F); + void computeHashMismatchSamples(const FunctionSamples &FS, + uint64_t &MismatchedSamples); void countProfileMismatches( const FunctionSamples &FS, const std::unordered_set @@ -492,7 +484,7 @@ void distributeIRToProfileLocationMap(); void distributeIRToProfileLocationMap(FunctionSamples &FS); void populateIRLocations( - const Function &F, const FunctionSamples &FS, + const Function &F, const FunctionSamples *FSForReporting, std::unordered_set &MatchedCallsiteLocs, std::map &IRLocations); void populateProfileCallsites( @@ -2115,7 +2107,7 @@ } void SampleProfileMatcher::populateIRLocations( - const Function &F, const FunctionSamples &FS, + const Function &F, const FunctionSamples *FSForReporting, std::unordered_set &MatchedCallsiteLocs, std::map &IRLocations) { for (auto &BB : F) { @@ -2175,10 +2167,14 @@ "Overwrite non-call or different callee name location for " "pseudo probe callsite"); + if (!FSForReporting) + continue; + // Go through all the callsites on the IR and flag the callsite if the // target name is the same as the one in the profile. - const auto CTM = FS.findCallTargetMapAt(IRCallsite); - const auto CallsiteFS = FS.findFunctionSamplesMapAt(IRCallsite); + const auto CTM = FSForReporting->findCallTargetMapAt(IRCallsite); + const auto CallsiteFS = + FSForReporting->findFunctionSamplesMapAt(IRCallsite); // Indirect call case. if (CalleeName.empty()) { @@ -2357,34 +2353,55 @@ } } -void SampleProfileMatcher::runOnFunction(const Function &F, - const FunctionSamples &FS) { - bool IsFuncHashMismatch = false; - if (FunctionSamples::ProfileIsProbeBased) { - uint64_t Count = FS.getTotalSamples(); - TotalFuncHashSamples += Count; - TotalProfiledFunc++; - const auto *FuncDesc = ProbeManager->getDesc(F); - if (FuncDesc && ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) { - MismatchedFuncHashSamples += Count; - NumMismatchedFuncHash++; - IsFuncHashMismatch = true; - } +void SampleProfileMatcher::computeHashMismatchSamples( + const FunctionSamples &FS, uint64_t &MismatchedSamples) { + const auto *FuncDesc = ProbeManager->getDesc(FS.getName()); + // Skip the function that is external or renamed. + if (!FuncDesc) + return; + + if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) { + MismatchedSamples += FS.getTotalSamples(); + return; } + for (const auto &I : FS.getCallsiteSamples()) + for (const auto &CS : I.second) + computeHashMismatchSamples(CS.second, MismatchedSamples); +} + +void SampleProfileMatcher::runOnFunction(const Function &F) { + FunctionSamples *FSForReporting = Reader.getSamplesFor(F); std::unordered_set MatchedCallsiteLocs; // The value of the map is the name of direct callsite and use empty StringRef // for non-direct-call site. std::map IRLocations; // Extract profile matching anchors and profile mismatch metrics in the IR. - populateIRLocations(F, FS, MatchedCallsiteLocs, IRLocations); + populateIRLocations(F, FSForReporting, MatchedCallsiteLocs, IRLocations); // Detect profile mismatch for profile staleness metrics report. - if (ReportProfileStaleness || PersistProfileStaleness) { + // Skip to report the metrics for the imported functions. + if (FSForReporting && + !GlobalValue::isAvailableExternallyLinkage(F.getLinkage()) && + (ReportProfileStaleness || PersistProfileStaleness)) { + bool IsFuncHashMismatch = false; + if (FunctionSamples::ProfileIsProbeBased) { + TotalFuncHashSamples += FSForReporting->getTotalSamples(); + ; + TotalProfiledFunc++; + const auto *FuncDesc = ProbeManager->getDesc(F); + if (FuncDesc && + ProbeManager->profileIsHashMismatched(*FuncDesc, *FSForReporting)) { + computeHashMismatchSamples(*FSForReporting, MismatchedFuncHashSamples); + NumMismatchedFuncHash++; + IsFuncHashMismatch = true; + } + } + uint64_t FuncMismatchedCallsites = 0; uint64_t FuncProfiledCallsites = 0; - countProfileMismatches(FS, MatchedCallsiteLocs, FuncMismatchedCallsites, - FuncProfiledCallsites); + countProfileMismatches(*FSForReporting, MatchedCallsiteLocs, + FuncMismatchedCallsites, FuncProfiledCallsites); TotalProfiledCallsites += FuncProfiledCallsites; NumMismatchedCallsites += FuncMismatchedCallsites; LLVM_DEBUG({ @@ -2396,12 +2413,22 @@ }); } - if (IsFuncHashMismatch && SalvageStaleProfile) { + // Run profile matching, currently only support for pseudo-probe. + if (SalvageStaleProfile && FunctionSamples::ProfileIsProbeBased) { + // Note that if a callsite isn't hit by any samples, it won't show up in the + // profile. We use flattened function samples so that callsites from + // different contexts are merged together for matching. + FunctionSamples *FSForMatching = getFlattenedSamplesFor(F); + assert((!FSForReporting || (FSForReporting && FSForMatching)) && + "Flattened profile should not be null if nested profile exists"); + // Run profile matching for checksum mismatched profile. + if (!FSForMatching || ProbeManager->profileIsValid(F, *FSForMatching)) + return; LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName() << "\n"); StringMap> CalleeToCallsitesMap; - populateProfileCallsites(FS, CalleeToCallsitesMap); + populateProfileCallsites(*FSForMatching, CalleeToCallsitesMap); // The matching result will be saved to IRToProfileLocationMap, create a new // map for each function. @@ -2413,17 +2440,13 @@ } void SampleProfileMatcher::runOnModule() { + if (SalvageStaleProfile) + ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles, + FunctionSamples::ProfileIsCS); for (auto &F : M) { if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile")) continue; - FunctionSamples *FS = nullptr; - if (FlattenProfileForMatching) - FS = getFlattenedSamplesFor(F); - else - FS = Reader.getSamplesFor(F); - if (!FS) - continue; - runOnFunction(F, *FS); + runOnFunction(F); } if (SalvageStaleProfile) distributeIRToProfileLocationMap(); diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile-mismatch-nested.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile-mismatch-nested.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile-mismatch-nested.prof @@ -0,0 +1,11 @@ +main:30:0 + 1: 0 + 12: 10 matched:10 + 20: 10 bar:10 + 13: bar:10 + 1: 10 + !CFGChecksum: 42949672950 + !CFGChecksum: 844635331715433 +bar:11:11 + 1: 11 + !CFGChecksum: 42949672950 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching-lto.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching-lto.prof --- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching-lto.prof +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching-lto.prof @@ -23,4 +23,4 @@ !CFGChecksum: 1125988587804525 bar:2401:2401 1: 2401 - !CFGChecksum: 4294967295 + !CFGChecksum: 123 diff --git a/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll deleted file mode 100644 --- a/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll +++ /dev/null @@ -1,13 +0,0 @@ -; REQUIRES: x86_64-linux -; RUN: opt < %S/profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=1 -S 2>%t -o %t.ll -; RUN: FileCheck %s --input-file %t -; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD - -; RUN: opt < %S/profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch-cs.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=1 -S 2>%t -o %t.ll -; RUN: FileCheck %s --input-file %t -; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD - - -; CHECK: (3/4) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch. - -; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 3, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 20, !"TotalCallsiteSamples", i64 30} diff --git a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll --- a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll +++ b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll @@ -1,5 +1,5 @@ ; REQUIRES: x86_64-linux -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=0 -S 2>%t -o %t.ll +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -S 2>%t -o %t.ll ; RUN: FileCheck %s --input-file %t ; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD ; RUN: llc < %t.ll -filetype=obj -o %t.obj diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll @@ -0,0 +1,10 @@ +; REQUIRES: x86_64-linux +; RUN: opt < %S/pseudo-probe-stale-profile-matching-lto.ll -passes='thinlto' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-matching-lto.prof -report-profile-staleness -persist-profile-staleness -S 2>%t -o %t.ll +; RUN: FileCheck %s --input-file %t +; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD + +; CHECK: (1/1) of functions' profile are invalid and (6822/6822) of samples are discarded due to function hash mismatch. +; CHECK: (4/4) of callsites' profile are invalid and (2470/2470) of samples are discarded due to callsite location mismatch. + + +; CHECK-MD: ![[#]] = !{!"NumMismatchedFuncHash", i64 1, !"TotalProfiledFunc", i64 1, !"MismatchedFuncHashSamples", i64 6822, !"TotalFuncHashSamples", i64 6822, !"NumMismatchedCallsites", i64 4, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 2470, !"TotalCallsiteSamples", i64 2470} diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll @@ -6,6 +6,8 @@ ; RUN: llvm-objdump --section-headers %t.obj | FileCheck %s --check-prefix=CHECK-OBJ ; RUN: llc < %t.ll -filetype=asm -o - | FileCheck %s --check-prefix=CHECK-ASM +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-profile-mismatch-nested.prof -report-profile-staleness -persist-profile-staleness -S 2>&1 | FileCheck %s --check-prefix=CHECK-NESTED + ; CHECK: (1/3) of functions' profile are invalid and (10/50) of samples are discarded due to function hash mismatch. ; CHECK: (2/3) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch. @@ -47,6 +49,8 @@ ; CHECK-ASM: .byte 4 ; CHECK-ASM: .ascii "MzA=" +; CHECK-NESTED: (1/2) of functions' profile are invalid and (11/41) of samples are discarded due to function hash mismatch. + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu"