diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -926,12 +926,16 @@ return CallsiteSamples; } - /// Return the maximum of sample counts in a function body including functions - /// inlined in it. - uint64_t getMaxCountInside() const { + /// Return the maximum of sample counts in a function body. When SkipCallSite + /// is false, which is the default, the return count includes samples in the + /// inlined functions. When SkipCallSite is true, the return count only + /// considers the body samples. + uint64_t getMaxCountInside(bool SkipCallSite = false) const { uint64_t MaxCount = 0; for (const auto &L : getBodySamples()) MaxCount = std::max(MaxCount, L.second.getSamples()); + if (SkipCallSite) + return MaxCount; for (const auto &C : getCallsiteSamples()) for (const FunctionSamplesMap::value_type &F : C.second) MaxCount = std::max(MaxCount, F.second.getMaxCountInside()); diff --git a/llvm/test/tools/llvm-profdata/Inputs/flatten_instr.proftext b/llvm/test/tools/llvm-profdata/Inputs/flatten_instr.proftext new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/flatten_instr.proftext @@ -0,0 +1,32 @@ +# IR level Instrumentation Flag +:ir +# Always instrument the function entry block +:entry_first +foo +# Func Hash: +1111 +# Num Counters: +5 +# Counter Values: +10000 +50 +2000 +40 +6000 + +bar.cc:bar +# Func Hash: +2222 +# Num Counters: +10 +# Counter Values: +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 diff --git a/llvm/test/tools/llvm-profdata/Inputs/flatten_sample.proftext b/llvm/test/tools/llvm-profdata/Inputs/flatten_sample.proftext new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/flatten_sample.proftext @@ -0,0 +1,12 @@ +foo:12345:1000 + 1: 1000 + 2.1: 1000 + 15: 5000 + 4: bar:1000 + 1: 1000 + 2: goo:3000 + 1: 3000 + 8: bar:40000 + 1: 10000 + 2: goo:30000 + 1: 30000 diff --git a/llvm/test/tools/llvm-profdata/suppl-instr-with-sample-flatten.test b/llvm/test/tools/llvm-profdata/suppl-instr-with-sample-flatten.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/suppl-instr-with-sample-flatten.test @@ -0,0 +1,17 @@ +Some basic tests for supplementing instrumentation profile with sample profile +with flattening. + +Test bar.cc:bar's counters will be set to PseudoHot. +RUN: llvm-profdata merge \ +RUN: -supplement-instr-with-sample=%p/Inputs/flatten_sample.proftext \ +RUN: %p/Inputs/flatten_instr.proftext -o %t +RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FLATTEN + +FLATTEN: bar.cc:bar: +FLATTEN-NEXT: Hash: 0x00000000000008ae +FLATTEN-NEXT: Counters: 10 +FLATTEN: foo: +FLATTEN-NEXT: Hash: 0x0000000000000457 +FLATTEN-NEXT: Counters: 5 +FLATTEN-NEXT: Block counts: [10000, 50, 2000, 40, 6000] +FLATTEN-NOT: goo: diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -632,6 +632,100 @@ } }; + // We need to flatten the SampleFDO profile as the InstrFDO + // profile does not have inlined callsite profiles. + // One caveat is the pre-inlined function -- their samples + // should be collapsed into the caller function. + // Here we do a DFS traversal to get the flatten profile + // info: the sum of entrycount and the max of maxcount. + // Here is the algorithm: + // recursive (FS, root_name) { + // name = FS->getName(); + // get samples for FS; + // if (InstrProf.find(name) { + // root_name = name; + // } else { + // if (name is in static_func map) { + // root_name = static_name; + // } + // } + // update the Map entry for root_name; + // for (subfs: FS) { + // recursive(subfs, root_name); + // } + // } + // + // Here is an example. + // + // SampleProfile: + // foo:12345:1000 + // 1: 1000 + // 2.1: 1000 + // 15: 5000 + // 4: bar:1000 + // 1: 1000 + // 2: goo:3000 + // 1: 3000 + // 8: bar:40000 + // 1: 10000 + // 2: goo:30000 + // 1: 30000 + // + // InstrProfile has two entries: + // foo + // bar.cc:bar + // + // After BuildMaxSampleMap, we should have the following in FlattenSampleMap: + // {"foo", {1000, 5000}} + // {"bar.cc:bar", {11000, 30000}} + // + // foo's has an entry count of 1000, and max body count of 5000. + // bar.cc:bar has an entry count of 11000 (sum two callsites of 1000 and + // 10000), and max count of 30000 (from the callsite in line 8). + // + // Note that goo's count will remain in bar.cc:bar() as it does not have an + // entry in InstrProfile. + DenseMap> FlattenSampleMap; + auto BuildMaxSampleMap = [&FlattenSampleMap, &StaticFuncMap, + &InstrProfileMap](const FunctionSamples &FS, + const StringRef &RootName) { + auto BuildMaxSampleMapImpl = [&](const FunctionSamples &FS, + const StringRef &RootName, + auto &BuildImpl) -> void { + const StringRef &Name = FS.getName(); + const StringRef *NewRootName = &RootName; + uint64_t EntrySample = FS.getHeadSamplesEstimate(); + uint64_t MaxBodySample = FS.getMaxCountInside(/* SkipCallSite*/ true); + + auto It = InstrProfileMap.find(Name); + if (It != InstrProfileMap.end()) { + NewRootName = &Name; + } else { + auto NewName = StaticFuncMap.find(Name); + if (NewName != StaticFuncMap.end()) { + It = InstrProfileMap.find(NewName->second.str()); + if (NewName->second != DuplicateNameStr) { + NewRootName = &NewName->second; + } + } else { + // Here the EntrySample is of an inlined function, so we should not + // update the EntrySample in the map. + EntrySample = 0; + } + } + EntrySample += FlattenSampleMap[*NewRootName].first; + MaxBodySample = + std::max(FlattenSampleMap[*NewRootName].second, MaxBodySample); + FlattenSampleMap[*NewRootName] = + std::make_pair(EntrySample, MaxBodySample); + + for (const auto &C : FS.getCallsiteSamples()) + for (const auto &F : C.second) + BuildImpl(F.second, *NewRootName, BuildImpl); + }; + BuildMaxSampleMapImpl(FS, RootName, BuildMaxSampleMapImpl); + }; + for (auto &PD : WC->Writer.getProfileData()) { // Populate IPBuilder. for (const auto &PDV : PD.getValue()) { @@ -650,6 +744,11 @@ buildStaticFuncMap(FullName); } + for (auto &PD : Reader->getProfiles()) { + sampleprof::FunctionSamples &FS = PD.second; + BuildMaxSampleMap(FS, FS.getName()); + } + ProfileSummary InstrPS = *IPBuilder.getSummary(); ProfileSummary SamplePS = Reader->getSummary(); @@ -679,20 +778,19 @@ // Find hot/warm functions in sample profile which is cold in instr profile // and adjust the profiles of those functions in the instr profile. - for (const auto &PD : Reader->getProfiles()) { - const sampleprof::FunctionSamples &FS = PD.second; - uint64_t SampleMaxCount = FS.getMaxCountInside(); + for (const auto &E : FlattenSampleMap) { + uint64_t SampleMaxCount = std::max(E.second.first, E.second.second); if (SampleMaxCount < ColdSampleThreshold) continue; - auto &FContext = PD.first; - auto It = InstrProfileMap.find(FContext.toString()); + const StringRef &Name = E.first; + auto It = InstrProfileMap.find(Name); if (It == InstrProfileMap.end()) { - auto NewName = StaticFuncMap.find(FContext.toString()); + auto NewName = StaticFuncMap.find(Name); if (NewName != StaticFuncMap.end()) { It = InstrProfileMap.find(NewName->second.str()); if (NewName->second == DuplicateNameStr) { WithColor::warning() - << "Static function " << FContext.toString() + << "Static function " << Name << " has multiple promoted names, cannot adjust profile.\n"; } }