diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h --- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -102,12 +102,19 @@ } } - const PseudoProbeDescriptor *getDesc(const Function &F) const { - auto I = GUIDToProbeDescMap.find( - Function::getGUID(FunctionSamples::getCanonicalFnName(F))); + const PseudoProbeDescriptor *getDesc(uint64_t GUID) const { + auto I = GUIDToProbeDescMap.find(GUID); return I == GUIDToProbeDescMap.end() ? nullptr : &I->second; } + const PseudoProbeDescriptor *getDesc(StringRef FProfileName) const { + return getDesc(Function::getGUID(FProfileName)); + } + + const PseudoProbeDescriptor *getDesc(const Function &F) const { + return getDesc(Function::getGUID(FunctionSamples::getCanonicalFnName(F))); + } + bool moduleIsProbed(const Module &M) const { return M.getNamedMetadata(PseudoProbeDescMetadataName); } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -477,7 +477,10 @@ return &It->second; return nullptr; } - void runOnFunction(const Function &F, const FunctionSamples &FS); + void runOnFunction(const Function &F, const FunctionSamples &FSForReporting, + const FunctionSamples &FSForMatching); + void computeHashMismatchSamples(const FunctionSamples &FS, + uint64_t &MismatchedSamples); void countProfileMismatches( const FunctionSamples &FS, const std::unordered_set @@ -2355,18 +2358,46 @@ } } +void SampleProfileMatcher::computeHashMismatchSamples( + const FunctionSamples &FS, uint64_t &MismatchedSamples) { + const auto *FuncDesc = ProbeManager->getDesc(FS.getName()); + // Skip the function that is external or renamed. + if (!FuncDesc) + return; + + if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) { + MismatchedSamples += FS.getTotalSamples(); + return; + } + + for (const auto &I : FS.getCallsiteSamples()) + for (const auto &CS : I.second) + computeHashMismatchSamples(CS.second, MismatchedSamples); +} + void SampleProfileMatcher::runOnFunction(const Function &F, - const FunctionSamples &FS) { - bool IsFuncHashMismatch = false; + const FunctionSamples &FSForReporting, + const FunctionSamples &FSForMatching) { + // Skip to report the metrics for the imported functions. + bool ShouldReportStaleness = + !GlobalValue::isAvailableExternallyLinkage(F.getLinkage()) && + (ReportProfileStaleness || PersistProfileStaleness); + bool ShouldMatchStaleProfile = false; + + // Local variables for staleness metrics. + uint64_t FuncTotalFuncHashSamples = FSForReporting.getTotalSamples(); + uint64_t FuncMismatchedFuncHashSamples = 0; + uint64_t FuncNumMismatchedFuncHash = 0; + uint64_t FuncMismatchedCallsites = 0; + uint64_t FuncProfiledCallsites = 0; + if (FunctionSamples::ProfileIsProbeBased) { - uint64_t Count = FS.getTotalSamples(); - TotalFuncHashSamples += Count; - TotalProfiledFunc++; const auto *FuncDesc = ProbeManager->getDesc(F); - if (FuncDesc && ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) { - MismatchedFuncHashSamples += Count; - NumMismatchedFuncHash++; - IsFuncHashMismatch = true; + if (FuncDesc && + ProbeManager->profileIsHashMismatched(*FuncDesc, FSForReporting)) { + computeHashMismatchSamples(FSForReporting, FuncMismatchedFuncHashSamples); + FuncNumMismatchedFuncHash++; + ShouldMatchStaleProfile = true; } } @@ -2375,31 +2406,35 @@ // for non-direct-call site. std::map IRLocations; // Extract profile matching anchors and profile mismatch metrics in the IR. - populateIRLocations(F, FS, MatchedCallsiteLocs, IRLocations); + populateIRLocations(F, FSForReporting, MatchedCallsiteLocs, IRLocations); // Detect profile mismatch for profile staleness metrics report. - if (ReportProfileStaleness || PersistProfileStaleness) { - uint64_t FuncMismatchedCallsites = 0; - uint64_t FuncProfiledCallsites = 0; - countProfileMismatches(FS, MatchedCallsiteLocs, FuncMismatchedCallsites, - FuncProfiledCallsites); - TotalProfiledCallsites += FuncProfiledCallsites; - NumMismatchedCallsites += FuncMismatchedCallsites; + if (ShouldReportStaleness) { + countProfileMismatches(FSForReporting, MatchedCallsiteLocs, + FuncMismatchedCallsites, FuncProfiledCallsites); + LLVM_DEBUG({ - if (FunctionSamples::ProfileIsProbeBased && !IsFuncHashMismatch && + if (FunctionSamples::ProfileIsProbeBased && !ShouldMatchStaleProfile && FuncMismatchedCallsites) dbgs() << "Function checksum is matched but there are " << FuncMismatchedCallsites << "/" << FuncProfiledCallsites << " mismatched callsites.\n"; }); + + TotalFuncHashSamples += FuncTotalFuncHashSamples; + TotalProfiledFunc++; + MismatchedFuncHashSamples += FuncMismatchedFuncHashSamples; + NumMismatchedFuncHash += FuncNumMismatchedFuncHash; + TotalProfiledCallsites += FuncProfiledCallsites; + NumMismatchedCallsites += FuncMismatchedCallsites; } - if (IsFuncHashMismatch && SalvageStaleProfile) { + if (ShouldMatchStaleProfile && SalvageStaleProfile) { LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName() << "\n"); StringMap> CalleeToCallsitesMap; - populateProfileCallsites(FS, CalleeToCallsitesMap); + populateProfileCallsites(FSForMatching, CalleeToCallsitesMap); // The matching result will be saved to IRToProfileLocationMap, create a new // map for each function. @@ -2414,14 +2449,15 @@ for (auto &F : M) { if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile")) continue; - FunctionSamples *FS = nullptr; + FunctionSamples *FS = Reader.getSamplesFor(F); + ; + FunctionSamples *FSForMatching = FS; if (FlattenProfileForMatching) - FS = getFlattenedSamplesFor(F); - else - FS = Reader.getSamplesFor(F); - if (!FS) + FSForMatching = getFlattenedSamplesFor(F); + + if (!FS || !FSForMatching) continue; - runOnFunction(F, *FS); + runOnFunction(F, *FS, *FSForMatching); } if (SalvageStaleProfile) distributeIRToProfileLocationMap(); diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile-mismatch-nested.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile-mismatch-nested.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile-mismatch-nested.prof @@ -0,0 +1,11 @@ +main:30:0 + 1: 0 + 12: 10 matched:10 + 20: 10 bar:10 + 13: bar:10 + 1: 10 + !CFGChecksum: 42949672950 + !CFGChecksum: 844635331715433 +bar:11:11 + 1: 11 + !CFGChecksum: 42949672950 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching-lto.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching-lto.prof --- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching-lto.prof +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching-lto.prof @@ -23,4 +23,4 @@ !CFGChecksum: 1125988587804525 bar:2401:2401 1: 2401 - !CFGChecksum: 4294967295 + !CFGChecksum: 123 diff --git a/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll deleted file mode 100644 --- a/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll +++ /dev/null @@ -1,13 +0,0 @@ -; REQUIRES: x86_64-linux -; RUN: opt < %S/profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=1 -S 2>%t -o %t.ll -; RUN: FileCheck %s --input-file %t -; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD - -; RUN: opt < %S/profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch-cs.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=1 -S 2>%t -o %t.ll -; RUN: FileCheck %s --input-file %t -; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD - - -; CHECK: (3/4) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch. - -; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 3, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 20, !"TotalCallsiteSamples", i64 30} diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll @@ -0,0 +1,10 @@ +; REQUIRES: x86_64-linux +; RUN: opt < %S/pseudo-probe-stale-profile-matching-lto.ll -passes='thinlto' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-matching-lto.prof -report-profile-staleness -persist-profile-staleness -S 2>%t -o %t.ll +; RUN: FileCheck %s --input-file %t +; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD + +; CHECK: (1/1) of functions' profile are invalid and (6822/6822) of samples are discarded due to function hash mismatch. +; CHECK: (4/4) of callsites' profile are invalid and (2470/2470) of samples are discarded due to callsite location mismatch. + + +; CHECK-MD: ![[#]] = !{!"NumMismatchedFuncHash", i64 1, !"TotalProfiledFunc", i64 1, !"MismatchedFuncHashSamples", i64 6822, !"TotalFuncHashSamples", i64 6822, !"NumMismatchedCallsites", i64 4, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 2470, !"TotalCallsiteSamples", i64 2470} diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll @@ -6,6 +6,8 @@ ; RUN: llvm-objdump --section-headers %t.obj | FileCheck %s --check-prefix=CHECK-OBJ ; RUN: llc < %t.ll -filetype=asm -o - | FileCheck %s --check-prefix=CHECK-ASM +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-profile-mismatch-nested.prof -report-profile-staleness -persist-profile-staleness -S 2>&1 | FileCheck %s --check-prefix=CHECK-NESTED + ; CHECK: (1/3) of functions' profile are invalid and (10/50) of samples are discarded due to function hash mismatch. ; CHECK: (2/3) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch. @@ -47,6 +49,8 @@ ; CHECK-ASM: .byte 4 ; CHECK-ASM: .ascii "MzA=" +; CHECK-NESTED: (1/2) of functions' profile are invalid and (11/41) of samples are discarded due to function hash mismatch. + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu"