diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h --- a/llvm/include/llvm/MC/MCPseudoProbe.h +++ b/llvm/include/llvm/MC/MCPseudoProbe.h @@ -36,7 +36,6 @@ // A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined // callees. Each record contains: // INLINE SITE -// GUID of the inlinee (uint64) // ID of the callsite probe (ULEB128) // FUNCTION BODY // A FUNCTION BODY entry describing the inlined function. diff --git a/llvm/test/tools/llvm-profgen/Inputs/inline-cs-dangling-pseudoprobe.perfscript b/llvm/test/tools/llvm-profgen/Inputs/inline-cs-dangling-pseudoprobe.perfscript new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/Inputs/inline-cs-dangling-pseudoprobe.perfscript @@ -0,0 +1,5 @@ +PERF_RECORD_MMAP2 595196/595196: [0x201000(0x1000) @ 0 00:1d 224227621 1042948]: r-xp /home/inline-cs-pseudoprobe.perfbin + + 20180e + 5541f689495641d7 + 0x201858/0x20180e/P/-/-/0 0x20182b/0x20184d/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 0x20182b/0x201800/P/-/-/0 diff --git a/llvm/test/tools/llvm-profgen/inline-cs-dangling-pseudoprobe.test b/llvm/test/tools/llvm-profgen/inline-cs-dangling-pseudoprobe.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/inline-cs-dangling-pseudoprobe.test @@ -0,0 +1,51 @@ +; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-dangling-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER +; RUN: FileCheck %s --input-file %t + +; CHECK: [main:2 @ foo]:58:0 +; CHECK-NEXT: 2: 15 +; CHECK-NEXT: 3: 14 +; CHECK-NEXT: 5: 14 +; CHECK-NEXT: 6: 15 +; CHECK-NEXT: !CFGChecksum: 138950591924 +; CHECK:[main:2 @ foo:8 @ bar]:1:0 +; CHECK-NEXT: 2: 18446744073709551615 +; CHECK-NEXT: 3: 18446744073709551615 +; CHECK-NEXT: 4: 1 +; CHECK-NEXT: !CFGChecksum: 72617220756 + + +; CHECK-UNWINDER: Binary(inline-cs-pseudoprobe.perfbin)'s Range Counter: +; CHECK-UNWINDER-EMPTY: +; CHECK-UNWINDER-NEXT: (800, 82b): 14 +; CHECK-UNWINDER-NEXT: (84d, 858): 1 + +; CHECK-UNWINDER: Binary(inline-cs-pseudoprobe.perfbin)'s Branch Counter: +; CHECK-UNWINDER-EMPTY: +; CHECK-UNWINDER-NEXT: (82b, 800): 14 +; CHECK-UNWINDER-NEXT: (82b, 84d): 1 +; CHECK-UNWINDER-NEXT: (858, 80e): 1 + +; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling +; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls +; -g test.c -o a.out + +#include + +int bar(int x, int y) { + if (x % 3) { + return x - y; + } + return x + y; +} + +void foo() { + int s, i = 0; + while (i++ < 4000 * 4000) + if (i % 91) s = bar(i, s); else s += 30; + printf("sum is %d\n", s); +} + +int main() { + foo(); + return 0; +} diff --git a/llvm/test/tools/llvm-profgen/merge-cold-profile.test b/llvm/test/tools/llvm-profgen/merge-cold-profile.test --- a/llvm/test/tools/llvm-profgen/merge-cold-profile.test +++ b/llvm/test/tools/llvm-profgen/merge-cold-profile.test @@ -1,17 +1,18 @@ ; Used the data from recursion-compression.test, refer it for the unmerged output -; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=8 -; RUN: FileCheck %s --input-file %t +; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t1 --compress-recursion=-1 --csprof-cold-thres=8 +; RUN: FileCheck %s --input-file %t1 ; Test --csprof-trim-cold-context=0 -; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=100 --csprof-trim-cold-context=0 -; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-KEEP-COLD +; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --csprof-cold-thres=100 --csprof-trim-cold-context=0 +; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-KEEP-COLD ; Test --csprof-merge-cold-context=0 -; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=10 --csprof-merge-cold-context=0 -; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNMERGED +; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t3 --compress-recursion=-1 --csprof-cold-thres=10 --csprof-merge-cold-context=0 +; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK-UNMERGED ; CHECK: [fa]:14:4 ; CHECK-NEXT: 1: 4 +; CHECK-NEXT: 2: 18446744073709551615 ; CHECK-NEXT: 3: 4 ; CHECK-NEXT: 4: 2 ; CHECK-NEXT: 5: 1 @@ -37,6 +38,7 @@ ; CHECK-KEEP-COLD-NEXT: !Attributes: 0 ; CHECK-KEEP-COLD-NEXT:[fa]:14:4 ; CHECK-KEEP-COLD-NEXT: 1: 4 +; CHECK-KEEP-COLD-NEXT: 2: 18446744073709551615 ; CHECK-KEEP-COLD-NEXT: 3: 4 ; CHECK-KEEP-COLD-NEXT: 4: 2 ; CHECK-KEEP-COLD-NEXT: 5: 1 diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -510,22 +510,19 @@ // Extract the top frame probes by looking up each address among the range in // the Address2ProbeMap extractProbesFromRange(RangeCounter, ProbeCounter, Binary); + std::unordered_map FrameSamples; for (auto PI : ProbeCounter) { const PseudoProbe *Probe = PI.first; uint64_t Count = PI.second; + // Ignore dangling probes since they will be reported later if needed. + if (Probe->isDangling()) + continue; FunctionSamples &FunctionProfile = getFunctionProfileForLeafProbe(ContextStrStack, Probe, Binary); - - // Use InvalidProbeCount(UINT64_MAX) to mark sample count for a dangling - // probe. Dangling probes are the probes associated to an empty block. With - // this place holder, sample count on dangling probe will not be trusted by - // the compiler and it will rely on the counts inference algorithm to get - // the probe a reasonable count. - if (Probe->isDangling()) { - FunctionProfile.addBodySamplesForProbe( - Probe->Index, FunctionSamples::InvalidProbeCount); - continue; - } + // Record the current frame and FunctionProfile whenever samples are + // collected for non-danglie probes. This is for reporting all of the + // dangling probes of the frame later. + FrameSamples[Probe->getInlineTreeNode()] = &FunctionProfile; FunctionProfile.addBodySamplesForProbe(Probe->Index, Count); FunctionProfile.addTotalSamples(Count); if (Probe->isEntry()) { @@ -554,6 +551,22 @@ FunctionProfile.getContext().getNameWithoutContext(), Count); } } + + // Report dangling probes for frames that have real samples collected. + // Dangling probes are the probes associated to an empty block. With this + // place holder, sample count on a dangling probe will not be trusted by the + // compiler and we will rely on the counts inference algorithm to get the + // probe a reasonable count. Use InvalidProbeCount to mark sample count for + // a dangling probe. + for (auto &I : FrameSamples) { + auto *FunctionProfile = I.second; + for (auto *Probe : I.first->getProbes()) { + if (Probe->isDangling()) { + FunctionProfile->addBodySamplesForProbe( + Probe->Index, FunctionSamples::InvalidProbeCount); + } + } + } } } diff --git a/llvm/tools/llvm-profgen/PseudoProbe.h b/llvm/tools/llvm-profgen/PseudoProbe.h --- a/llvm/tools/llvm-profgen/PseudoProbe.h +++ b/llvm/tools/llvm-profgen/PseudoProbe.h @@ -45,9 +45,10 @@ return std::get<0>(Site) ^ std::get<1>(Site); } }; - std::unordered_map, - InlineSiteHash> - Children; + using InlinedProbeTreeMap = + std::unordered_map, + InlineSiteHash>; + InlinedProbeTreeMap Children; public: // Inlinee function GUID @@ -71,6 +72,8 @@ return Ret.first->second.get(); } + InlinedProbeTreeMap &getChildren() { return Children; } + std::vector &getProbes() { return ProbeVector; } void addProbes(PseudoProbe *Probe) { ProbeVector.push_back(Probe); } // Return false if it's a dummy inline site bool hasInlineSite() const { return std::get<0>(ISite) != 0; } @@ -91,7 +94,7 @@ // GUID to PseudoProbeFuncDesc map using GUIDProbeFunctionMap = std::unordered_map; // Address to pseudo probes map. -using AddressProbesMap = std::unordered_map>; +using AddressProbesMap = std::unordered_map>; /* A pseudo probe has the format like below: @@ -135,6 +138,8 @@ bool isDirectCall() const { return Type == PseudoProbeType::DirectCall; } bool isCall() const { return isIndirectCall() || isDirectCall(); } + PseudoProbeInlineTree *getInlineTreeNode() const { return InlineTree; } + // Get the inlined context by traversing current inline tree backwards, // each tree node has its InlineSite which is taken as the context. // \p ContextStack is populated in root to leaf order diff --git a/llvm/tools/llvm-profgen/PseudoProbe.cpp b/llvm/tools/llvm-profgen/PseudoProbe.cpp --- a/llvm/tools/llvm-profgen/PseudoProbe.cpp +++ b/llvm/tools/llvm-profgen/PseudoProbe.cpp @@ -198,7 +198,6 @@ // A list of NUM_INLINED_FUNCTIONS entries describing each of the // inlined callees. Each record contains: // INLINE SITE - // GUID of the inlinee (uint64) // Index of the callsite probe (ULEB128) // FUNCTION BODY // A FUNCTION BODY entry describing the inlined function. @@ -214,8 +213,11 @@ uint32_t Index = 0; // A DFS-based decoding while (Data < End) { - // Read inline site for inlinees - if (Root != Cur) { + if (Root == Cur) { + // Use a sequential id for top level inliner. + Index = Root->getChildren().size(); + } else { + // Read inline site for inlinees Index = readUnsignedNumber(); } // Switch/add to a new tree node(inlinee) @@ -243,10 +245,10 @@ Addr = readUnencodedNumber(); } // Populate Address2ProbesMap - std::vector &ProbeVec = Address2ProbesMap[Addr]; - ProbeVec.emplace_back(Addr, Cur->GUID, Index, PseudoProbeType(Kind), Attr, - Cur); - Cur->addProbes(&ProbeVec.back()); + auto &Probes = Address2ProbesMap[Addr]; + Probes.emplace_back(Addr, Cur->GUID, Index, PseudoProbeType(Kind), Attr, + Cur); + Cur->addProbes(&Probes.back()); LastAddr = Addr; } @@ -298,7 +300,7 @@ auto It = Address2ProbesMap.find(Address); if (It == Address2ProbesMap.end()) return nullptr; - const std::vector &Probes = It->second; + const auto &Probes = It->second; const PseudoProbe *CallProbe = nullptr; for (const auto &Probe : Probes) {