diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h --- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -102,12 +102,19 @@ } } - const PseudoProbeDescriptor *getDesc(const Function &F) const { - auto I = GUIDToProbeDescMap.find( - Function::getGUID(FunctionSamples::getCanonicalFnName(F))); + const PseudoProbeDescriptor *getDesc(uint64_t GUID) const { + auto I = GUIDToProbeDescMap.find(GUID); return I == GUIDToProbeDescMap.end() ? nullptr : &I->second; } + const PseudoProbeDescriptor *getDesc(StringRef FProfileName) const { + return getDesc(Function::getGUID(FProfileName)); + } + + const PseudoProbeDescriptor *getDesc(const Function &F) const { + return getDesc(Function::getGUID(FunctionSamples::getCanonicalFnName(F))); + } + bool moduleIsProbed(const Module &M) const { return M.getNamedMetadata(PseudoProbeDescMetadataName); } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -142,11 +142,6 @@ cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section).")); -static cl::opt FlattenProfileForMatching( - "flatten-profile-for-matching", cl::Hidden, cl::init(true), - cl::desc( - "Use flattened profile for stale profile detection and matching.")); - static cl::opt ProfileSampleAccurate( "profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " @@ -466,12 +461,7 @@ public: SampleProfileMatcher(Module &M, SampleProfileReader &Reader, const PseudoProbeManager *ProbeManager) - : M(M), Reader(Reader), ProbeManager(ProbeManager) { - if (FlattenProfileForMatching) { - ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles, - FunctionSamples::ProfileIsCS); - } - } + : M(M), Reader(Reader), ProbeManager(ProbeManager){}; void runOnModule(); private: @@ -482,8 +472,14 @@ return &It->second; return nullptr; } - void runOnFunction(const Function &F, const FunctionSamples &FS); + void runOnFunction(const Function &F); + void computeHashMismatchSamples(const FunctionSamples &FS, + uint64_t &MismatchedSamples); void countProfileMismatches( + const Function &F, const FunctionSamples &FS, + const std::map> &ProfileLocations, + const std::map &IRLocations); + void countProfileCallsiteMismatches( const FunctionSamples &FS, const std::map> &ProfileLocations, const std::map &IRLocations, @@ -2180,11 +2176,44 @@ } void SampleProfileMatcher::countProfileMismatches( + const Function &F, const FunctionSamples &FS, + const std::map> &ProfileLocations, + const std::map &IRLocations) { + bool IsFuncHashMismatch = false; + if (FunctionSamples::ProfileIsProbeBased) { + TotalFuncHashSamples += FS.getTotalSamples(); + TotalProfiledFunc++; + const auto *FuncDesc = ProbeManager->getDesc(F); + if (FuncDesc) { + if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) { + NumMismatchedFuncHash++; + IsFuncHashMismatch = true; + } + computeHashMismatchSamples(FS, MismatchedFuncHashSamples); + } + } + + uint64_t FuncMismatchedCallsites = 0; + uint64_t FuncProfiledCallsites = 0; + countProfileCallsiteMismatches(FS, ProfileLocations, IRLocations, + FuncMismatchedCallsites, + FuncProfiledCallsites); + TotalProfiledCallsites += FuncProfiledCallsites; + NumMismatchedCallsites += FuncMismatchedCallsites; + LLVM_DEBUG({ + if (FunctionSamples::ProfileIsProbeBased && !IsFuncHashMismatch && + FuncMismatchedCallsites) + dbgs() << "Function checksum is matched but there are " + << FuncMismatchedCallsites << "/" << FuncProfiledCallsites + << " mismatched callsites.\n"; + }); +} + +void SampleProfileMatcher::countProfileCallsiteMismatches( const FunctionSamples &FS, const std::map> &ProfileLocations, const std::map &IRLocations, uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites) { - // Check if there are any callsites in the profile that does not match to any // IR callsites, those callsite samples will be discarded. for (const auto &I : ProfileLocations) { @@ -2358,49 +2387,54 @@ } } -void SampleProfileMatcher::runOnFunction(const Function &F, - const FunctionSamples &FS) { - bool IsFuncHashMismatch = false; - if (FunctionSamples::ProfileIsProbeBased) { - uint64_t Count = FS.getTotalSamples(); - TotalFuncHashSamples += Count; - TotalProfiledFunc++; - const auto *FuncDesc = ProbeManager->getDesc(F); - if (FuncDesc && ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) { - MismatchedFuncHashSamples += Count; - NumMismatchedFuncHash++; - IsFuncHashMismatch = true; - } +void SampleProfileMatcher::computeHashMismatchSamples( + const FunctionSamples &FS, uint64_t &MismatchedSamples) { + const auto *FuncDesc = ProbeManager->getDesc(FS.getName()); + // Skip the function that is external or renamed. + if (!FuncDesc) + return; + + if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) { + MismatchedSamples += FS.getTotalSamples(); + return; } + for (const auto &I : FS.getCallsiteSamples()) + for (const auto &CS : I.second) + computeHashMismatchSamples(CS.second, MismatchedSamples); +} + +void SampleProfileMatcher::runOnFunction(const Function &F) { + const auto *FS = Reader.getSamplesFor(F); + // Use flattened function samples to populate profile locations as function + // samples under different context may have different callsites, so merge them + // together for the matching. + const auto *FSFlattened = getFlattenedSamplesFor(F); + if (!FSFlattened) + return; // Anchors for IR. It's a map from IR location to the name of direct call // target. Also use empty StringRef for non-call instruction and use a dummy // name:IndirectCalleeName("_indirect_call_") for indirect callsite. std::map IRLocations; populateIRLocations(F, IRLocations); + // Anchors for profile. it's a map from callsite location to set of callee // name. std::map> ProfileLocations; - populateProfileLocations(FS, ProfileLocations); + populateProfileLocations(*FSFlattened, ProfileLocations); // Detect profile mismatch for profile staleness metrics report. - if (ReportProfileStaleness || PersistProfileStaleness) { - uint64_t FuncMismatchedCallsites = 0; - uint64_t FuncProfiledCallsites = 0; - countProfileMismatches(FS, ProfileLocations, IRLocations, - FuncMismatchedCallsites, FuncProfiledCallsites); - TotalProfiledCallsites += FuncProfiledCallsites; - NumMismatchedCallsites += FuncMismatchedCallsites; - LLVM_DEBUG({ - if (FunctionSamples::ProfileIsProbeBased && !IsFuncHashMismatch && - FuncMismatchedCallsites) - dbgs() << "Function checksum is matched but there are " - << FuncMismatchedCallsites << "/" << FuncProfiledCallsites - << " mismatched callsites.\n"; - }); - } - - if (IsFuncHashMismatch && SalvageStaleProfile) { + // Skip reporting the metrics for imported functions. + if (FS && !GlobalValue::isAvailableExternallyLinkage(F.getLinkage()) && + (ReportProfileStaleness || PersistProfileStaleness)) { + countProfileMismatches(F, *FS, ProfileLocations, IRLocations); + } + + // Run profile matching, currently only support for pseudo-probe. + if (SalvageStaleProfile && FunctionSamples::ProfileIsProbeBased) { + // Run profile matching for checksum mismatched profile. + if (ProbeManager->profileIsValid(F, *FSFlattened)) + return; LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName() << "\n"); @@ -2417,17 +2451,12 @@ } void SampleProfileMatcher::runOnModule() { + ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles, + FunctionSamples::ProfileIsCS); for (auto &F : M) { if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile")) continue; - FunctionSamples *FS = nullptr; - if (FlattenProfileForMatching) - FS = getFlattenedSamplesFor(F); - else - FS = Reader.getSamplesFor(F); - if (!FS) - continue; - runOnFunction(F, *FS); + runOnFunction(F); } if (SalvageStaleProfile) distributeIRToProfileLocationMap(); diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile-mismatch-nested.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile-mismatch-nested.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile-mismatch-nested.prof @@ -0,0 +1,11 @@ +main:30:0 + 1: 0 + 12: 10 matched:10 + 20: 10 bar:10 + 13: bar:10 + 1: 10 + !CFGChecksum: 42949672950 + !CFGChecksum: 844635331715433 +bar:11:11 + 1: 11 + !CFGChecksum: 42949672950 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching-lto.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching-lto.prof --- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching-lto.prof +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching-lto.prof @@ -23,4 +23,4 @@ !CFGChecksum: 1125988587804525 bar:2401:2401 1: 2401 - !CFGChecksum: 4294967295 + !CFGChecksum: 123 diff --git a/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll deleted file mode 100644 --- a/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll +++ /dev/null @@ -1,13 +0,0 @@ -; REQUIRES: x86_64-linux -; RUN: opt < %S/profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=1 -S 2>%t -o %t.ll -; RUN: FileCheck %s --input-file %t -; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD - -; RUN: opt < %S/profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch-cs.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=1 -S 2>%t -o %t.ll -; RUN: FileCheck %s --input-file %t -; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD - - -; CHECK: (3/4) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch. - -; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 3, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 20, !"TotalCallsiteSamples", i64 30} diff --git a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll --- a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll +++ b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll @@ -1,5 +1,5 @@ ; REQUIRES: x86_64-linux -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=0 -S 2>%t -o %t.ll +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -S 2>%t -o %t.ll ; RUN: FileCheck %s --input-file %t ; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD ; RUN: llc < %t.ll -filetype=obj -o %t.obj diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll @@ -0,0 +1,10 @@ +; REQUIRES: x86_64-linux +; RUN: opt < %S/pseudo-probe-stale-profile-matching-lto.ll -passes='thinlto' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-matching-lto.prof -report-profile-staleness -persist-profile-staleness -S 2>%t -o %t.ll +; RUN: FileCheck %s --input-file %t +; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD + +; CHECK: (1/1) of functions' profile are invalid and (6822/6822) of samples are discarded due to function hash mismatch. +; CHECK: (4/4) of callsites' profile are invalid and (5026/5026) of samples are discarded due to callsite location mismatch. + + +; CHECK-MD: ![[#]] = !{!"NumMismatchedFuncHash", i64 1, !"TotalProfiledFunc", i64 1, !"MismatchedFuncHashSamples", i64 6822, !"TotalFuncHashSamples", i64 6822, !"NumMismatchedCallsites", i64 4, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 5026, !"TotalCallsiteSamples", i64 5026} diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll @@ -6,6 +6,8 @@ ; RUN: llvm-objdump --section-headers %t.obj | FileCheck %s --check-prefix=CHECK-OBJ ; RUN: llc < %t.ll -filetype=asm -o - | FileCheck %s --check-prefix=CHECK-ASM +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-profile-mismatch-nested.prof -report-profile-staleness -persist-profile-staleness -S 2>&1 | FileCheck %s --check-prefix=CHECK-NESTED + ; CHECK: (1/3) of functions' profile are invalid and (10/50) of samples are discarded due to function hash mismatch. ; CHECK: (2/3) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch. @@ -47,6 +49,8 @@ ; CHECK-ASM: .byte 4 ; CHECK-ASM: .ascii "MzA=" +; CHECK-NESTED: (1/2) of functions' profile are invalid and (21/41) of samples are discarded due to function hash mismatch. + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu"