diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h --- a/llvm/include/llvm/MC/MCPseudoProbe.h +++ b/llvm/include/llvm/MC/MCPseudoProbe.h @@ -55,6 +55,7 @@ #include #include #include +#include #include namespace llvm { @@ -353,6 +354,14 @@ // Decode pseudo_probe section to build address to probes map. bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size); + // Decode pseudo_probe section to build address to probes map for specifed functions only. + bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size, + std::unordered_set &GuildFilter); + + bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur, + uint64_t &LastAddr, + std::unordered_set &GuildFilter); + // Print pseudo_probe_desc section info void printGUID2FuncDescMap(raw_ostream &OS); diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp --- a/llvm/lib/MC/MCPseudoProbe.cpp +++ b/llvm/lib/MC/MCPseudoProbe.cpp @@ -358,8 +358,9 @@ return true; } -bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start, - std::size_t Size) { +bool MCPseudoProbeDecoder::buildAddress2ProbeMap( + MCDecodedPseudoProbeInlineTree *Cur, uint64_t &LastAddr, + std::unordered_set &GuildFilter) { // The pseudo_probe section encodes an inline forest and each tree has a // format like: // FUNCTION BODY (one for each uninlined function present in the text @@ -390,101 +391,109 @@ // FUNCTION BODY // A FUNCTION BODY entry describing the inlined function. - Data = Start; - End = Data + Size; - - MCDecodedPseudoProbeInlineTree *Root = &DummyInlineRoot; - MCDecodedPseudoProbeInlineTree *Cur = &DummyInlineRoot; - uint64_t LastAddr = 0; uint32_t Index = 0; - // A DFS-based decoding - while (Data < End) { - if (Root == Cur) { - // Use a sequential id for top level inliner. - Index = Root->getChildren().size(); - } else { - // Read inline site for inlinees - auto ErrorOrIndex = readUnsignedNumber(); - if (!ErrorOrIndex) - return false; - Index = std::move(*ErrorOrIndex); - } + if (Cur == &DummyInlineRoot) { + // Use a sequential id for top level inliner. + Index = Cur->getChildren().size(); + } else { + // Read inline site for inlinees + auto ErrorOrIndex = readUnsignedNumber(); + if (!ErrorOrIndex) + return false; + Index = std::move(*ErrorOrIndex); + } + + // Read guid + auto ErrorOrCurGuid = readUnencodedNumber(); + if (!ErrorOrCurGuid) + return false; + uint64_t Guid = std::move(*ErrorOrCurGuid); + + // Decide if top-level node should be disgarded. + if (Cur == &DummyInlineRoot && !GuildFilter.empty() && !GuildFilter.count(Guid)) + Cur = nullptr; + + // If the incoming node is null, all its children nodes should be disgarded. + if (Cur) { // Switch/add to a new tree node(inlinee) Cur = Cur->getOrAddNode(std::make_tuple(Cur->Guid, Index)); - // Read guid - auto ErrorOrCurGuid = readUnencodedNumber(); - if (!ErrorOrCurGuid) - return false; - Cur->Guid = std::move(*ErrorOrCurGuid); - // Read number of probes in the current node. - auto ErrorOrNodeCount = readUnsignedNumber(); - if (!ErrorOrNodeCount) + Cur->Guid = Guid; + } + + // Read number of probes in the current node. + auto ErrorOrNodeCount = readUnsignedNumber(); + if (!ErrorOrNodeCount) + return false; + uint32_t NodeCount = std::move(*ErrorOrNodeCount); + // Read number of direct inlinees + auto ErrorOrCurChildrenToProcess = readUnsignedNumber(); + if (!ErrorOrCurChildrenToProcess) + return false; + // Read all probes in this node + for (std::size_t I = 0; I < NodeCount; I++) { + // Read index + auto ErrorOrIndex = readUnsignedNumber(); + if (!ErrorOrIndex) return false; - uint32_t NodeCount = std::move(*ErrorOrNodeCount); - // Read number of direct inlinees - auto ErrorOrCurChildrenToProcess = readUnsignedNumber(); - if (!ErrorOrCurChildrenToProcess) + uint32_t Index = std::move(*ErrorOrIndex); + // Read type | flag. + auto ErrorOrValue = readUnencodedNumber(); + if (!ErrorOrValue) return false; - Cur->ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess); - // Read all probes in this node - for (std::size_t I = 0; I < NodeCount; I++) { - // Read index - auto ErrorOrIndex = readUnsignedNumber(); - if (!ErrorOrIndex) + uint8_t Value = std::move(*ErrorOrValue); + uint8_t Kind = Value & 0xf; + uint8_t Attr = (Value & 0x70) >> 4; + // Read address + uint64_t Addr = 0; + if (Value & 0x80) { + auto ErrorOrOffset = readSignedNumber(); + if (!ErrorOrOffset) return false; - uint32_t Index = std::move(*ErrorOrIndex); - // Read type | flag. - auto ErrorOrValue = readUnencodedNumber(); - if (!ErrorOrValue) + int64_t Offset = std::move(*ErrorOrOffset); + Addr = LastAddr + Offset; + } else { + auto ErrorOrAddr = readUnencodedNumber(); + if (!ErrorOrAddr) return false; - uint8_t Value = std::move(*ErrorOrValue); - uint8_t Kind = Value & 0xf; - uint8_t Attr = (Value & 0x70) >> 4; - // Read address - uint64_t Addr = 0; - if (Value & 0x80) { - auto ErrorOrOffset = readSignedNumber(); - if (!ErrorOrOffset) - return false; - int64_t Offset = std::move(*ErrorOrOffset); - Addr = LastAddr + Offset; - } else { - auto ErrorOrAddr = readUnencodedNumber(); - if (!ErrorOrAddr) - return false; - Addr = std::move(*ErrorOrAddr); - } + Addr = std::move(*ErrorOrAddr); + } + + if (Cur) { // Populate Address2ProbesMap auto &Probes = Address2ProbesMap[Addr]; Probes.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind), Attr, Cur); Cur->addProbes(&Probes.back()); - LastAddr = Addr; } + LastAddr = Addr; + } - // Look for the parent for the next node by subtracting the current - // node count from tree counts along the parent chain. The first node - // in the chain that has a non-zero tree count is the target. - while (Cur != Root) { - if (Cur->ChildrenToProcess == 0) { - Cur = static_cast(Cur->Parent); - if (Cur != Root) { - assert(Cur->ChildrenToProcess > 0 && - "Should have some unprocessed nodes"); - Cur->ChildrenToProcess -= 1; - } - } else { - break; - } - } + uint32_t ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess); + for (uint32_t I = 0; I < ChildrenToProcess; I++) { + buildAddress2ProbeMap(Cur, LastAddr, GuildFilter); } + return true; +} + +bool MCPseudoProbeDecoder::buildAddress2ProbeMap( + const uint8_t *Start, std::size_t Size, + std::unordered_set &GuildFilter) { + Data = Start; + End = Data + Size; + uint64_t LastAddr = 0; + while (Data < End) + buildAddress2ProbeMap(&DummyInlineRoot, LastAddr, GuildFilter); assert(Data == End && "Have unprocessed data in pseudo_probe section"); - assert(Cur == Root && - " Cur should point to root when the forest is fully built up"); return true; } +bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start, + std::size_t Size) { + std::unordered_set GuildFilter; + return buildAddress2ProbeMap(Start, Size, GuildFilter); +} + void MCPseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) { OS << "Pseudo Probe Desc:\n"; // Make the output deterministic diff --git a/llvm/test/tools/llvm-profgen/cs-extbinary.test b/llvm/test/tools/llvm-profgen/cs-extbinary.test --- a/llvm/test/tools/llvm-profgen/cs-extbinary.test +++ b/llvm/test/tools/llvm-profgen/cs-extbinary.test @@ -1,5 +1,5 @@ ; test for dwarf-based cs profile -; RUN: llvm-profgen --format=extbinary --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t1 --profile-summary-hot-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 +; RUN: llvm-profgen --format=extbinary --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t1 --profile-summary-hot-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 --decode-probe-for-profiled-functions-only=0 ; RUN: llvm-profdata merge --sample --text --output=%t2 %t1 ; RUN: FileCheck %S/recursion-compression-noprobe.test --input-file %t2 ; RUN: llvm-profdata merge --sample --extbinary --output=%t3 %t2 && llvm-profdata merge --sample --text --output=%t4 %t3 @@ -7,7 +7,7 @@ ; test for probe-based cs profile -; RUN: llvm-profgen --format=extbinary --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t5 --profile-summary-hot-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 +; RUN: llvm-profgen --format=extbinary --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t5 --profile-summary-hot-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 --decode-probe-for-profiled-functions-only=0 ; RUN: llvm-profdata merge --sample --text --output=%t6 %t5 ; RUN: FileCheck %S/recursion-compression-pseudoprobe.test --input-file %t6 ; RUN: llvm-profdata merge --sample --extbinary --output=%t7 %t6 && llvm-profdata merge --sample --text --output=%t8 %t7 diff --git a/llvm/test/tools/llvm-profgen/merge-cold-profile.test b/llvm/test/tools/llvm-profgen/merge-cold-profile.test --- a/llvm/test/tools/llvm-profgen/merge-cold-profile.test +++ b/llvm/test/tools/llvm-profgen/merge-cold-profile.test @@ -1,17 +1,17 @@ ; Used the data from recursion-compression.test, refer it for the unmerged output -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t1 --compress-recursion=-1 --profile-summary-hot-count=8 --csspgo-preinliner=0 --gen-cs-nested-profile=0 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t1 --compress-recursion=-1 --profile-summary-hot-count=8 --csspgo-preinliner=0 --gen-cs-nested-profile=0 --decode-probe-for-profiled-functions-only=0 ; RUN: FileCheck %s --input-file %t1 ; Test --trim-cold-profile=0 -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --trim-cold-profile=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --trim-cold-profile=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 --decode-probe-for-profiled-functions-only=0 ; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-KEEP-COLD ; Test --csprof-merge-cold-context=0 -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t3 --compress-recursion=-1 --profile-summary-hot-count=10 --csprof-merge-cold-context=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t3 --compress-recursion=-1 --profile-summary-hot-count=10 --csprof-merge-cold-context=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 --decode-probe-for-profiled-functions-only=0 ; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK-UNMERGED ; Test --csprof-frame-depth-for-cold-context -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --trim-cold-profile=0 --csprof-max-cold-context-depth=2 --csspgo-preinliner=0 --gen-cs-nested-profile=0 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-hot-count=100 --trim-cold-profile=0 --csprof-max-cold-context-depth=2 --csspgo-preinliner=0 --gen-cs-nested-profile=0 --decode-probe-for-profiled-functions-only=0 ; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-COLD-CONTEXT-LENGTH ; CHECK: [fa]:14:4 diff --git a/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe-on-demand.test copy from llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test copy to llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe-on-demand.test --- a/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe-on-demand.test @@ -1,14 +1,10 @@ -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --skip-symbolization --profile-summary-cold-count=0 -; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 --decode-probe-for-profiled-functions-only=1 ; RUN: FileCheck %s --input-file %t -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.aggperfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --skip-symbolization --profile-summary-cold-count=0 -; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.aggperfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.aggperfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 --decode-probe-for-profiled-functions-only=1 ; RUN: FileCheck %s --input-file %t -; CHECK: [main:2 @ foo]:75:0 +; CHECK: [foo]:75:0 ; CHECK-NEXT: 1: 0 ; CHECK-NEXT: 2: 15 ; CHECK-NEXT: 3: 15 @@ -19,25 +15,11 @@ ; CHECK-NEXT: 8: 15 bar:15 ; CHECK-NEXT: 9: 0 ; CHECK-NEXT: !CFGChecksum: 563088904013236 -; CHECK:[main:2 @ foo:8 @ bar]:30:15 +; CHECK:[foo:8 @ bar]:30:15 ; CHECK-NEXT: 1: 15 ; CHECK-NEXT: 4: 15 ; CHECK-NEXT: !CFGChecksum: 72617220756 -; CHECK-UNWINDER: [main:2] -; CHECK-UNWINDER-NEXT: 2 -; CHECK-UNWINDER-NEXT: 79e-7bf:15 -; CHECK-UNWINDER-NEXT: 7c4-7cf:15 -; CHECK-UNWINDER-NEXT: 2 -; CHECK-UNWINDER-NEXT: 7bf->760:15 -; CHECK-UNWINDER-NEXT: 7cf->79e:16 -; CHECK-UNWINDER-NEXT: [main:2 @ foo:8] -; CHECK-UNWINDER-NEXT: 1 -; CHECK-UNWINDER-NEXT: 760-77f:15 -; CHECK-UNWINDER-NEXT: 1 -; CHECK-UNWINDER-NEXT: 77f->7c4:17 - - ; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling ; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls ; -fno-inline-functions -g test.c -o a.out diff --git a/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test --- a/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test @@ -1,10 +1,10 @@ ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --skip-symbolization --profile-summary-cold-count=0 ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 --decode-probe-for-profiled-functions-only=0 ; RUN: FileCheck %s --input-file %t ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.aggperfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --skip-symbolization --profile-summary-cold-count=0 ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.aggperfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-pseudoprobe.aggperfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 --decode-probe-for-profiled-functions-only=0 ; RUN: FileCheck %s --input-file %t @@ -24,14 +24,14 @@ ; CHECK-NEXT: 4: 15 ; CHECK-NEXT: !CFGChecksum: 72617220756 -; CHECK-UNWINDER: [main:2] +; CHECK-UNWINDER: [0x7f4] ; CHECK-UNWINDER-NEXT: 2 ; CHECK-UNWINDER-NEXT: 79e-7bf:15 ; CHECK-UNWINDER-NEXT: 7c4-7cf:15 ; CHECK-UNWINDER-NEXT: 2 ; CHECK-UNWINDER-NEXT: 7bf->760:15 ; CHECK-UNWINDER-NEXT: 7cf->79e:16 -; CHECK-UNWINDER-NEXT: [main:2 @ foo:8] +; CHECK-UNWINDER-NEXT: [0x7f4 @ 0x7bf] ; CHECK-UNWINDER-NEXT: 1 ; CHECK-UNWINDER-NEXT: 760-77f:15 ; CHECK-UNWINDER-NEXT: 1 diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test --- a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test @@ -1,15 +1,17 @@ ; Firstly test uncompression(--compress-recursion=0) -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 --profile-summary-hot-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 --profile-summary-hot-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 --decode-probe-for-profiled-functions-only=0 ; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 --profile-summary-hot-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 --decode-probe-for-profiled-functions-only=1 +; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS-OD ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --skip-symbolization --profile-summary-hot-count=0 ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --profile-summary-hot-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --profile-summary-hot-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 --decode-probe-for-profiled-functions-only=0 ; RUN: FileCheck %s --input-file %t ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe-nommap.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --skip-symbolization --profile-summary-hot-count=0 ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe-nommap.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --profile-summary-hot-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe-nommap.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --profile-summary-hot-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 --decode-probe-for-profiled-functions-only=0 ; RUN: FileCheck %s --input-file %t -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 --profile-summary-hot-count=0 --csprof-max-context-depth=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 --profile-summary-hot-count=0 --csprof-max-context-depth=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 --decode-probe-for-profiled-functions-only=0 ; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-MAX-CTX-DEPTH @@ -68,6 +70,63 @@ ; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:1:0 ; CHECK-UNCOMPRESS: 5: 1 fb:1 ; CHECK-UNCOMPRESS: !CFGChecksum: 563022570642068 + +; CHECK-UNCOMPRESS-OD: [fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa]:4:1 +; CHECK-UNCOMPRESS-OD: 1: 1 +; CHECK-UNCOMPRESS-OD: 3: 1 +; CHECK-UNCOMPRESS-OD: 5: 1 +; CHECK-UNCOMPRESS-OD: 8: 1 fa:1 +; CHECK-UNCOMPRESS-OD: !CFGChecksum: 563070469352221 +; CHECK-UNCOMPRESS-OD: [fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa]:4:1 +; CHECK-UNCOMPRESS-OD: 1: 1 +; CHECK-UNCOMPRESS-OD: 3: 1 +; CHECK-UNCOMPRESS-OD: 4: 1 +; CHECK-UNCOMPRESS-OD: 7: 1 fb:1 +; CHECK-UNCOMPRESS-OD: !CFGChecksum: 563070469352221 +; CHECK-UNCOMPRESS-OD: [fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:4:1 +; CHECK-UNCOMPRESS-OD: 1: 1 +; CHECK-UNCOMPRESS-OD: 3: 1 +; CHECK-UNCOMPRESS-OD: 4: 1 +; CHECK-UNCOMPRESS-OD: 7: 1 fb:1 +; CHECK-UNCOMPRESS-OD: !CFGChecksum: 563070469352221 +; CHECK-UNCOMPRESS-OD: [fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1 +; CHECK-UNCOMPRESS-OD: 1: 1 +; CHECK-UNCOMPRESS-OD: 2: 1 +; CHECK-UNCOMPRESS-OD: 5: 1 fb:1 +; CHECK-UNCOMPRESS-OD: !CFGChecksum: 563022570642068 +; CHECK-UNCOMPRESS-OD: [fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1 +; CHECK-UNCOMPRESS-OD: 1: 1 +; CHECK-UNCOMPRESS-OD: 2: 1 +; CHECK-UNCOMPRESS-OD: 5: 1 fb:1 +; CHECK-UNCOMPRESS-OD: !CFGChecksum: 563022570642068 +; CHECK-UNCOMPRESS-OD: [fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1 +; CHECK-UNCOMPRESS-OD: 1: 1 +; CHECK-UNCOMPRESS-OD: 2: 1 +; CHECK-UNCOMPRESS-OD: 5: 1 fb:1 +; CHECK-UNCOMPRESS-OD: !CFGChecksum: 563022570642068 +; CHECK-UNCOMPRESS-OD: [fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1 +; CHECK-UNCOMPRESS-OD: 1: 1 +; CHECK-UNCOMPRESS-OD: 3: 1 +; CHECK-UNCOMPRESS-OD: 6: 1 fa:1 +; CHECK-UNCOMPRESS-OD: !CFGChecksum: 563022570642068 +; CHECK-UNCOMPRESS-OD: [fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb]:3:1 +; CHECK-UNCOMPRESS-OD: 1: 1 +; CHECK-UNCOMPRESS-OD: 3: 1 +; CHECK-UNCOMPRESS-OD: 6: 1 fa:1 +; CHECK-UNCOMPRESS-OD: !CFGChecksum: 563022570642068 +; CHECK-UNCOMPRESS-OD: [fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb]:3:1 +; CHECK-UNCOMPRESS-OD: 1: 1 +; CHECK-UNCOMPRESS-OD: 3: 1 +; CHECK-UNCOMPRESS-OD: 6: 1 fa:1 +; CHECK-UNCOMPRESS-OD: !CFGChecksum: 563022570642068 +; CHECK-UNCOMPRESS-OD: [fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb:6 @ fa]:2:1 +; CHECK-UNCOMPRESS-OD: 1: 1 +; CHECK-UNCOMPRESS-OD: 3: 1 +; CHECK-UNCOMPRESS-OD: !CFGChecksum: 563070469352221 +; CHECK-UNCOMPRESS-OD: [fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:1:0 +; CHECK-UNCOMPRESS-OD: 5: 1 fb:1 +; CHECK-UNCOMPRESS-OD: !CFGChecksum: 563022570642068 + ; CHECK-MAX-CTX-DEPTH: [fb]:19:6 ; CHECK-MAX-CTX-DEPTH: 1: 6 ; CHECK-MAX-CTX-DEPTH: 2: 3 @@ -123,7 +182,7 @@ ; CHECK: 6: 1 fa:1 ; CHECK: !CFGChecksum: 563022570642068 -; CHECK-UNWINDER: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5] +; CHECK-UNWINDER: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab] ; CHECK-UNWINDER-NEXT: 3 ; CHECK-UNWINDER-NEXT: 7a0-7a7:1 ; CHECK-UNWINDER-NEXT: 7a0-7ab:3 @@ -132,33 +191,33 @@ ; CHECK-UNWINDER-NEXT: 7a7->7b2:1 ; CHECK-UNWINDER-NEXT: 7ab->7a0:4 ; CHECK-UNWINDER-NEXT: 7b5->7c0:1 -; CHECK-UNWINDER-NEXT: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6] +; CHECK-UNWINDER-NEXT: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab @ 0x7b5] ; CHECK-UNWINDER-NEXT: 1 ; CHECK-UNWINDER-NEXT: 7c0-7d4:1 ; CHECK-UNWINDER-NEXT: 1 ; CHECK-UNWINDER-NEXT: 7d4->7c0:1 -; CHECK-UNWINDER-NEXT: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8] +; CHECK-UNWINDER-NEXT: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab @ 0x7b5 @ 0x7d4] ; CHECK-UNWINDER-NEXT: 2 ; CHECK-UNWINDER-NEXT: 7c0-7cd:1 ; CHECK-UNWINDER-NEXT: 7db-7e0:1 ; CHECK-UNWINDER-NEXT: 2 ; CHECK-UNWINDER-NEXT: 7cd->7db:1 ; CHECK-UNWINDER-NEXT: 7e0->7a0:1 -; CHECK-UNWINDER-NEXT: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7] +; CHECK-UNWINDER-NEXT: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab @ 0x7b5 @ 0x7d4 @ 0x7e0] ; CHECK-UNWINDER-NEXT: 2 ; CHECK-UNWINDER-NEXT: 7a0-7a7:1 ; CHECK-UNWINDER-NEXT: 7b2-7b5:1 ; CHECK-UNWINDER-NEXT: 2 ; CHECK-UNWINDER-NEXT: 7a7->7b2:1 ; CHECK-UNWINDER-NEXT: 7b5->7c0:1 -; CHECK-UNWINDER-NEXT: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6] +; CHECK-UNWINDER-NEXT: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab @ 0x7b5 @ 0x7d4 @ 0x7e0 @ 0x7b5] ; CHECK-UNWINDER-NEXT: 2 ; CHECK-UNWINDER-NEXT: 7c0-7cd:2 ; CHECK-UNWINDER-NEXT: 7db-7e0:1 ; CHECK-UNWINDER-NEXT: 2 ; CHECK-UNWINDER-NEXT: 7cd->7db:2 ; CHECK-UNWINDER-NEXT: 7e0->7a0:1 -; CHECK-UNWINDER-NEXT: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7] +; CHECK-UNWINDER-NEXT: [0x842 @ 0x7d4 @ 0x7e0 @ 0x7ab @ 0x7b5 @ 0x7d4 @ 0x7e0 @ 0x7b5 @ 0x7e0] ; CHECK-UNWINDER-NEXT: 2 ; CHECK-UNWINDER-NEXT: 7a0-7a7:1 ; CHECK-UNWINDER-NEXT: 7b2-7b5:1 diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h --- a/llvm/tools/llvm-profgen/PerfReader.h +++ b/llvm/tools/llvm-profgen/PerfReader.h @@ -333,7 +333,7 @@ }; // Utilities for LLVM-style RTTI - enum ContextKind { CK_StringBased, CK_ProbeBased }; + enum ContextKind { CK_StringBased, CK_ProbeBased, CK_AddrBased }; const ContextKind Kind; ContextKind getKind() const { return Kind; } ContextKey(ContextKind K) : Kind(K){}; @@ -359,34 +359,23 @@ } }; -// Probe based context key as the intermediate key of context -// String based context key will introduce redundant string handling -// since the callee context is inferred from the context string which -// need to be splitted by '@' to get the last location frame, so we -// can just use probe instead and generate the string in the end. -struct ProbeBasedCtxKey : public ContextKey { - SmallVector Probes; +// Address-based context id +struct AddrBasedCtxKey : public ContextKey { + SmallVector Context; - ProbeBasedCtxKey() : ContextKey(CK_ProbeBased) {} + bool WasLeafInlined; + AddrBasedCtxKey() : ContextKey(CK_AddrBased), WasLeafInlined(false){}; static bool classof(const ContextKey *K) { - return K->getKind() == CK_ProbeBased; + return K->getKind() == CK_AddrBased; } bool isEqual(const ContextKey *K) const override { - const ProbeBasedCtxKey *O = dyn_cast(K); - assert(O != nullptr && "Probe based key shouldn't be null in isEqual"); - return std::equal(Probes.begin(), Probes.end(), O->Probes.begin(), - O->Probes.end()); + const AddrBasedCtxKey *Other = dyn_cast(K); + return Context == Other->Context; } void genHashCode() override { - for (const auto *P : Probes) { - HashCode = hash_combine(HashCode, P); - } - if (HashCode == 0) { - // Avoid zero value of HashCode when it's an empty list - HashCode = 1; - } + HashCode = hash_combine_range(Context.begin(), Context.end()); } }; @@ -433,22 +422,14 @@ std::shared_ptr getContextKey(); }; -struct ProbeStack { - SmallVector Stack; +struct AddressStack { + SmallVector Stack; ProfiledBinary *Binary; - ProbeStack(ProfiledBinary *B) : Binary(B) {} + AddressStack(ProfiledBinary *B) : Binary(B) {} bool pushFrame(UnwindState::ProfiledFrame *Cur) { assert(!Cur->isExternalFrame() && "External frame's not expected for context stack."); - const MCDecodedPseudoProbe *CallProbe = - Binary->getCallProbeForAddr(Cur->Address); - // We may not find a probe for a merged or external callsite. - // Callsite merging may cause the loss of original probe IDs. - // Cutting off the context from here since the inliner will - // not know how to consume a context with unknown callsites. - if (!CallProbe) - return false; - Stack.push_back(CallProbe); + Stack.push_back(Cur->Address); return true; } @@ -456,18 +437,7 @@ if (!Stack.empty()) Stack.pop_back(); } - // Use pseudo probe based context key to get the sample counter - // A context stands for a call path from 'main' to an uninlined - // callee with all inline frames recovered on that path. The probes - // belonging to that call path is the probes either originated from - // the callee or from any functions inlined into the callee. Since - // pseudo probes are organized in a tri-tree style after decoded, - // the tree path from the tri-tree root (which is the uninlined - // callee) to the probe node forms an inline context. - // Here we use a list of probe(pointer) as the context key to speed up - // aggregation and the final context string will be generate in - // ProfileGenerator - std::shared_ptr getContextKey(); + std::shared_ptr getContextKey(); }; /* diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -179,17 +179,12 @@ return KeyStr; } -std::shared_ptr ProbeStack::getContextKey() { - std::shared_ptr ProbeBasedKey = - std::make_shared(); - for (auto CallProbe : Stack) { - ProbeBasedKey->Probes.emplace_back(CallProbe); - } - CSProfileGenerator::compressRecursionContext( - ProbeBasedKey->Probes); - CSProfileGenerator::trimContext( - ProbeBasedKey->Probes); - return ProbeBasedKey; +std::shared_ptr AddressStack::getContextKey() { + std::shared_ptr KeyStr = std::make_shared(); + KeyStr->Context = Stack; + CSProfileGenerator::compressRecursionContext(KeyStr->Context); + CSProfileGenerator::trimContext(KeyStr->Context); + return KeyStr; } template @@ -252,8 +247,8 @@ void VirtualUnwinder::collectSamplesFromFrameTrie( UnwindState::ProfiledFrame *Cur) { if (Binary->usePseudoProbes()) { - ProbeStack Stack(Binary); - collectSamplesFromFrameTrie(Cur, Stack); + AddressStack Stack(Binary); + collectSamplesFromFrameTrie(Cur, Stack); } else { FrameStack Stack(Binary); collectSamplesFromFrameTrie(Cur, Stack); @@ -461,14 +456,17 @@ const ProfiledBinary *Binary) { if (const auto *CtxKey = dyn_cast(K)) { return SampleContext::getContextString(CtxKey->Context); - } else if (const auto *CtxKey = dyn_cast(K)) { - SampleContextFrameVector ContextStack; - for (const auto *Probe : CtxKey->Probes) { - Binary->getInlineContextForProbe(Probe, ContextStack, true); + } else if (const auto *CtxKey = dyn_cast(K)) { + std::ostringstream OContextStr; + for (uint32_t I = 0; I < CtxKey->Context.size(); I++) { + if (OContextStr.str().size()) + OContextStr << " @ "; + OContextStr << "0x" + << to_hexString( + Binary->virtualAddrToOffset(CtxKey->Context[I]), + false); } - // Probe context key at this point does not have leaf probe, so do not - // include the leaf inline location. - return SampleContext::getContextString(ContextStack, true); + return OContextStr.str(); } else { llvm_unreachable("unexpected key type"); } diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -105,6 +105,8 @@ void showDensitySuggestion(double Density); + void collectProfiledFunctions(); + // Thresholds from profile summary to answer isHotCount/isColdCount queries. uint64_t HotCountThreshold; diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -5,12 +5,12 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - #include "ProfileGenerator.h" #include "ErrorHandling.h" #include "ProfiledBinary.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/ProfileData/ProfileCommon.h" +#include #include #include @@ -89,6 +89,7 @@ llvm::cl::Optional); extern cl::opt ProfileSummaryCutoffHot; +extern cl::opt DecodeProbeForProfiledFunctionsOnly; static cl::opt GenCSNestedProfile( "gen-cs-nested-profile", cl::Hidden, cl::init(true), @@ -370,6 +371,30 @@ } } +void ProfileGeneratorBase::collectProfiledFunctions() { + std::unordered_set ProfiledFunctions; + // Go through all the ranges in sample counters, use the start of the range to + // look up the function it belongs and record the function. + for (const auto &CI : SampleCounters) { + for (auto Item : CI.second.RangeCounter) { + uint64_t StartOffset = Item.first.first; + if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset)) + ProfiledFunctions.insert(FRange->Func); + } + + for (auto Item : CI.second.BranchCounter) { + uint64_t SourceOffset = Item.first.first; + uint64_t TargetOffset = Item.first.first; + if (FuncRange *FRange = Binary->findFuncRangeForOffset(SourceOffset)) + ProfiledFunctions.insert(FRange->Func); + if (FuncRange *FRange = Binary->findFuncRangeForOffset(TargetOffset)) + ProfiledFunctions.insert(FRange->Func); + } + } + + Binary->setProfiledFunctions(ProfiledFunctions); +} + FunctionSamples & ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) { SampleContext Context(FuncName); @@ -382,6 +407,7 @@ } void ProfileGenerator::generateProfile() { + collectProfiledFunctions(); if (Binary->usePseudoProbes()) { generateProbeBasedProfile(); } else { @@ -428,6 +454,9 @@ void ProfileGenerator::generateProbeBasedProfile() { assert(SampleCounters.size() == 1 && "Must have one entry for profile generation."); + // Decode pseudo probe for profiled functions only. + if (DecodeProbeForProfiledFunctionsOnly) + Binary->decodePseudoProbe(); // Enable pseudo probe functionalities in SampleProf FunctionSamples::ProfileIsProbeBased = true; const SampleCounter &SC = SampleCounters.begin()->second; @@ -442,16 +471,18 @@ void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions( const RangeSample &RangeCounter) { ProbeCounterMap ProbeCounter; - // preprocessRangeCounter returns disjoint ranges, so no longer to redo it inside - // extractProbesFromRange. - extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter, false); + // preprocessRangeCounter returns disjoint ranges, so no longer to redo it + // inside extractProbesFromRange. + extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter, + false); for (const auto &PI : ProbeCounter) { const MCDecodedPseudoProbe *Probe = PI.first; uint64_t Count = PI.second; SampleContextFrameVector FrameVec; Binary->getInlineContextForProbe(Probe, FrameVec, true); - FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(FrameVec, Count); + FunctionSamples &FunctionProfile = + getLeafProfileAndAddTotalSamples(FrameVec, Count); FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); if (Probe->isEntry()) FunctionProfile.addHeadSamples(Count); @@ -496,7 +527,8 @@ &getTopLevelFunctionProfile(FrameVec[0].FuncName); FunctionProfile->addTotalSamples(Count); if (Binary->usePseudoProbes()) { - const auto *FuncDesc = Binary->getFuncDescForGUID(Function::getGUID(FunctionProfile->getName())); + const auto *FuncDesc = Binary->getFuncDescForGUID( + Function::getGUID(FunctionProfile->getName())); FunctionProfile->setFunctionHash(FuncDesc->FuncHash); } @@ -515,7 +547,8 @@ FunctionProfile = &Ret.first->second; FunctionProfile->addTotalSamples(Count); if (Binary->usePseudoProbes()) { - const auto *FuncDesc = Binary->getFuncDescForGUID(Function::getGUID(FunctionProfile->getName())); + const auto *FuncDesc = Binary->getFuncDescForGUID( + Function::getGUID(FunctionProfile->getName())); FunctionProfile->setFunctionHash(FuncDesc->FuncHash); } } @@ -646,32 +679,23 @@ void CSProfileGenerator::generateProfile() { FunctionSamples::ProfileIsCSFlat = true; - if (Binary->getTrackFuncContextSize()) - computeSizeForProfiledFunctions(); + collectProfiledFunctions(); if (Binary->usePseudoProbes()) { generateProbeBasedProfile(); } else { generateLineNumBasedProfile(); } + + if (Binary->getTrackFuncContextSize()) + computeSizeForProfiledFunctions(); + postProcessProfiles(); } void CSProfileGenerator::computeSizeForProfiledFunctions() { std::unordered_set ProfiledFunctions; - - // Go through all the ranges in the CS counters, use the start of the range to - // look up the function it belongs and record the function. - for (const auto &CI : SampleCounters) { - for (const auto &Item : CI.second.RangeCounter) { - // FIXME: Filter the bogus crossing function range. - uint64_t StartOffset = Item.first.first; - if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset)) - ProfiledFunctions.insert(FRange->Func); - } - } - - for (auto *Func : ProfiledFunctions) + for (auto *Func : Binary->getProfiledFunctions()) Binary->computeInlinedContextSizeForFunc(Func); // Flush the symbolizer to save memory. @@ -919,13 +943,44 @@ } } +static void +extractPrefixContextStack(SampleContextFrameVector &ContextStack, + const SmallVectorImpl &Addresses, + ProfiledBinary *Binary) { + SmallVector Probes; + for (auto Addr : reverse(Addresses)) { + const MCDecodedPseudoProbe *CallProbe = Binary->getCallProbeForAddr(Addr); + // We may not find a probe for functions that are not sampled. Also, we may + // not find a probe for a merged or external callsite. Callsite merging may + // cause the loss of original probe IDs. Cutting off the context from here + // since the inliner will not know how to consume a context with unknown + // callsites. + if (!CallProbe) + break; + Probes.push_back(CallProbe); + } + + std::reverse(Probes.begin(), Probes.end()); + + // Extract context stack for reusing, leaf context stack will be added + // compressed while looking up function profile. + for (const auto *P : Probes) { + Binary->getInlineContextForProbe(P, ContextStack, true); + } +} + void CSProfileGenerator::generateProbeBasedProfile() { + // On-demand decode pseudo probe. + if (DecodeProbeForProfiledFunctionsOnly) + Binary->decodePseudoProbe(); + // Enable pseudo probe functionalities in SampleProf FunctionSamples::ProfileIsProbeBased = true; for (const auto &CI : SampleCounters) { - const auto *CtxKey = cast(CI.first.getPtr()); + const AddrBasedCtxKey *CtxKey = + dyn_cast(CI.first.getPtr()); SampleContextFrameVector ContextStack; - extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary); + extractPrefixContextStack(ContextStack, CtxKey->Context, Binary); // Fill in function body samples from probes, also infer caller's samples // from callee's probe populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -218,6 +218,9 @@ // A map of mapping function name to BinaryFunction info. std::unordered_map BinaryFunctions; + // A list of binary functions that have samples. + std::unordered_set ProfiledFunctions; + // An ordered map of mapping function's start offset to function range // relevant info. Currently to determine if the offset of ELF is the start of // a real function, we leverage the function range info from DWARF. @@ -278,6 +281,8 @@ template void setPreferredTextSegmentAddresses(const ELFFile &Obj, StringRef FileName); + void checkPseudoProbe(const ELFObjectFileBase *Obj); + void decodePseudoProbe(const ELFObjectFileBase *Obj); void @@ -331,6 +336,9 @@ setupSymbolizer(); load(); } + + void decodePseudoProbe(); + uint64_t virtualAddrToOffset(uint64_t VirtualAddress) const { return VirtualAddress - BaseAddress; } @@ -453,6 +461,14 @@ return BinaryFunctions; } + std::unordered_set &getProfiledFunctions() { + return ProfiledFunctions; + } + + void setProfiledFunctions(std::unordered_set &Funcs) { + ProfiledFunctions = Funcs; + } + BinaryFunction *getBinaryFunction(StringRef FName) { auto I = BinaryFunctions.find(FName.str()); if (I == BinaryFunctions.end()) diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -54,6 +54,10 @@ cl::desc("List of functions to print disassembly for. Accept demangled " "names only. Only work with show-disassembly-only")); +cl::opt DecodeProbeForProfiledFunctionsOnly( + "decode-probe-for-profiled-functions-only", cl::init(true), cl::ZeroOrMore, + cl::desc("Decode pseudo probe for profiled functions only.")); + extern cl::opt ShowDetailedWarning; namespace llvm { @@ -208,8 +212,11 @@ // Find the preferred load address for text sections. setPreferredTextSegmentAddresses(Obj); - // Decode pseudo probe related section - decodePseudoProbe(Obj); + checkPseudoProbe(Obj); + + // Decode pseudo probe related section if not in on-demand mode. + if (!DecodeProbeForProfiledFunctionsOnly || ShowDisassemblyOnly) + decodePseudoProbe(Obj); // Load debug info of subprograms from DWARF section. // If path of debug info binary is specified, use the debug info from it, @@ -324,10 +331,38 @@ llvm_unreachable("invalid ELF object format"); } -void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) { +void ProfiledBinary::checkPseudoProbe(const ELFObjectFileBase *Obj) { if (UseDwarfCorrelation) return; + bool HasProbeDescSection = false; + bool HasPseudoProbeSection = false; + + StringRef FileName = Obj->getFileName(); + for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end(); + SI != SE; ++SI) { + const SectionRef &Section = *SI; + StringRef SectionName = unwrapOrError(Section.getName(), FileName); + if (SectionName == ".pseudo_probe_desc") { + HasProbeDescSection = true; + } else if (SectionName == ".pseudo_probe") { + HasPseudoProbeSection = true; + } + } + + // set UsePseudoProbes flag, used for PerfReader + UsePseudoProbes = HasProbeDescSection && HasPseudoProbeSection; +} + +void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) { + if (!UsePseudoProbes) + return; + + std::unordered_set ProfiledGuids; + for (auto *F : ProfiledFunctions) { + ProfiledGuids.insert(Function::getGUID(F->FuncName)); + } + StringRef FileName = Obj->getFileName(); for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end(); SI != SE; ++SI) { @@ -344,16 +379,14 @@ StringRef Contents = unwrapOrError(Section.getContents(), FileName); if (!ProbeDecoder.buildAddress2ProbeMap( reinterpret_cast(Contents.data()), - Contents.size())) + Contents.size(), ProfiledGuids)) exitWithError("Pseudo Probe decoder fail in .pseudo_probe section"); - // set UsePseudoProbes flag, used for PerfReader - UsePseudoProbes = true; } } // Build TopLevelProbeFrameMap to track size for optimized inlinees when probe // is available - if (UsePseudoProbes && TrackFuncContextSize) { + if (TrackFuncContextSize) { for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) { auto *Frame = Child.second.get(); StringRef FuncName = @@ -366,6 +399,13 @@ ProbeDecoder.printGUID2FuncDescMap(outs()); } +void ProfiledBinary::decodePseudoProbe() { + OwningBinary OBinary = unwrapOrError(createBinary(Path), Path); + Binary &ExeBinary = *OBinary.getBinary(); + auto *Obj = dyn_cast(&ExeBinary); + decodePseudoProbe(Obj); +} + void ProfiledBinary::setIsFuncEntry(uint64_t Offset, StringRef RangeSymName) { // Note that the start offset of each ELF section can be a non-function // symbol, we need to binary search for the start of a real function range.