diff --git a/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-noprobe.perfbin b/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-noprobe.perfbin new file mode 100755 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@ + +int fb(int n) { + if(n > 10) return fb(n / 2); + return fa(n - 1); +} + +int fa(int n) { + if(n < 2) return n; + if(n % 2) return fb(n - 1); + return fa(n - 1); +} + +void foo() { + int s, i = 0; + while (i++ < 10000) + s += fa(i); + printf("sum is %d\n", s); +} + +int main() { + foo(); + return 0; +} diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test @@ -0,0 +1,169 @@ +; Firstly test uncompression(--compress-recursion=0) +; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 +; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS +; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER +; RUN: FileCheck %s --input-file %t + +; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:4:1 +; CHECK-UNCOMPRESS: 1: 1 +; CHECK-UNCOMPRESS: 3: 1 +; CHECK-UNCOMPRESS: 4: 1 +; CHECK-UNCOMPRESS: 7: 1 fb:1 +; CHECK-UNCOMPRESS: !CFGChecksum: 120515930909 +; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa]:4:1 +; CHECK-UNCOMPRESS: 1: 1 +; CHECK-UNCOMPRESS: 3: 1 +; CHECK-UNCOMPRESS: 4: 1 +; CHECK-UNCOMPRESS: 7: 1 fb:1 +; CHECK-UNCOMPRESS: !CFGChecksum: 120515930909 +; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa]:4:1 +; CHECK-UNCOMPRESS: 1: 1 +; CHECK-UNCOMPRESS: 3: 1 +; CHECK-UNCOMPRESS: 5: 1 +; CHECK-UNCOMPRESS: 8: 1 fa:1 +; CHECK-UNCOMPRESS: !CFGChecksum: 120515930909 +; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb]:3:1 +; CHECK-UNCOMPRESS: 1: 1 +; CHECK-UNCOMPRESS: 3: 1 +; CHECK-UNCOMPRESS: 6: 1 fa:1 +; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756 +; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb]:3:1 +; CHECK-UNCOMPRESS: 1: 1 +; CHECK-UNCOMPRESS: 3: 1 +; CHECK-UNCOMPRESS: 6: 1 fa:1 +; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756 +; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1 +; CHECK-UNCOMPRESS: 1: 1 +; CHECK-UNCOMPRESS: 3: 1 +; CHECK-UNCOMPRESS: 6: 1 fa:1 +; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756 +; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1 +; CHECK-UNCOMPRESS: 1: 1 +; CHECK-UNCOMPRESS: 2: 1 +; CHECK-UNCOMPRESS: 5: 1 fb:1 +; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756 +; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1 +; CHECK-UNCOMPRESS: 1: 1 +; CHECK-UNCOMPRESS: 2: 1 +; CHECK-UNCOMPRESS: 5: 1 fb:1 +; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756 +; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1 +; CHECK-UNCOMPRESS: 1: 1 +; CHECK-UNCOMPRESS: 2: 1 +; CHECK-UNCOMPRESS: 5: 1 fb:1 +; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756 +; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb:6 @ fa]:2:1 +; CHECK-UNCOMPRESS: 1: 1 +; CHECK-UNCOMPRESS: 3: 1 +; CHECK-UNCOMPRESS: !CFGChecksum: 120515930909 +; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:1:0 +; CHECK-UNCOMPRESS: 5: 1 fb:1 +; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756 + + +; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb]:13:4 +; CHECK: 1: 4 +; CHECK: 2: 3 +; CHECK: 3: 1 +; CEHCK: 5: 4 fb:4 +; CHECK: 6: 1 fa:1 +; CHECK !CFGChecksum: 72617220756 +; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:6:2 +; CHECK: 1: 2 +; CHECK: 3: 2 +; CHECK: 4: 1 +; CHECK: 7: 1 fb:1 +; CHECK: !CFGChecksum: 120515930909 +; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa]:4:1 +; CHECK: 1: 1 +; CHECK: 3: 1 +; CHECK: 4: 1 +; CHECK: 7: 1 fb:1 +; CHECK: !CFGChecksum: 120515930909 +; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa]:4:1 +; CHECK: 1: 1 +; CHECK: 3: 1 +; CHECK: 5: 1 +; CHECK: 8: 1 fa:1 +; CHECK: !CFGChecksum: 120515930909 +; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb]:3:1 +; CHECK: 1: 1 +; CHECK: 3: 1 +; CHECK: 6: 1 fa:1 +; CHECK: !CFGChecksum: 72617220756 +; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb]:3:1 +; CHECK: 1: 1 +; CHECK: 3: 1 +; CHECK: 6: 1 fa:1 +; CHECK: !CFGChecksum: 72617220756 + + +; CHECK-UNWINDER: Binary(recursion-compression-pseudoprobe.perfbin)'s Range Counter: +; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 +; CHECK-UNWINDER: (7a0, 7a7): 1 +; CHECK-UNWINDER: (7a0, 7ab): 3 +; CHECK-UNWINDER: (7b2, 7b5): 1 +; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 +; CHECK-UNWINDER: (7c0, 7d4): 1 +; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 +; CHECK-UNWINDER: (7c0, 7cd): 1 +; CHECK-UNWINDER: (7db, 7e0): 1 +; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 +; CHECK-UNWINDER: (7a0, 7a7): 1 +; CHECK-UNWINDER: (7b2, 7b5): 1 +; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 +; CHECK-UNWINDER: (7c0, 7cd): 2 +; CHECK-UNWINDER: (7db, 7e0): 1 +; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 +; CHECK-UNWINDER: (7a0, 7a7): 1 +; CHECK-UNWINDER: (7b2, 7b5): 1 + +; CHECK-UNWINDER: Binary(recursion-compression-pseudoprobe.perfbin)'s Branch Counter: +; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 +; CHECK-UNWINDER: (7a7, 7b2): 1 +; CHECK-UNWINDER: (7ab, 7a0): 4 +; CHECK-UNWINDER: (7b5, 7c0): 1 +; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 +; CHECK-UNWINDER: (7d4, 7c0): 1 +; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 +; CHECK-UNWINDER: (7cd, 7db): 1 +; CHECK-UNWINDER: (7e0, 7a0): 1 +; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 +; CHECK-UNWINDER: (7a7, 7b2): 1 +; CHECK-UNWINDER: (7b5, 7c0): 1 +; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 +; CHECK-UNWINDER: (7cd, 7db): 2 +; CHECK-UNWINDER: (7e0, 7a0): 1 +; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 +; CHECK-UNWINDER: (7a7, 7b2): 1 +; CHECK-UNWINDER: (7b5, 7c0): 1 + + +; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling +; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls +; -g test.c -o a.out + +#include + +int fb(int n) { + if(n > 10) return fb(n / 2); + return fa(n - 1); +} + +int fa(int n) { + if(n < 2) return n; + if(n % 2) return fb(n - 1); + return fa(n - 1); +} + +void foo() { + int s, i = 0; + while (i++ < 10000) + s += fa(i); + printf("sum is %d\n", s); +} + +int main() { + foo(); + return 0; +} diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// #include "PerfReader.h" +#include "ProfileGenerator.h" static cl::opt ShowMmapEvents("show-mmap-events", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, @@ -124,6 +125,8 @@ ProbeBasedKey->Probes.emplace_back(CallProbe); } } + CSProfileGenerator::compressRecursionContext( + ProbeBasedKey->Probes); ProbeBasedKey->genHashCode(); Hashable ContextId(ProbeBasedKey); auto Ret = CtxCounterMap->emplace(ContextId, SampleCounter()); diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -50,6 +50,7 @@ */ void findDisjointRanges(RangeSample &DisjointRanges, const RangeSample &Ranges); + // Used by SampleProfileWriter StringMap ProfileMap; }; @@ -91,6 +92,110 @@ populateInferredFunctionSamples(); } + // Remove adjacent repeated context sequences up to a given sequence length, + // -1 means no size limit. Note that repeated sequences are identified based + // on the exact call site, this is finer granularity than function recursion. + template + static void compressRecursionContext(SmallVectorImpl &Context, + int32_t CSize = MaxCompressionSize) { + uint32_t I = 1; + uint32_t HS = static_cast(Context.size() / 2); + uint32_t MaxDedupSize = + CSize == -1 ? HS : std::min(static_cast(CSize), HS); + auto BeginIter = Context.begin(); + // Use an in-place algorithm to save memory copy + // End indicates the end location of current iteration's data + uint32_t End = 0; + // Deduplicate from length 1 to the max possible size of a repeated + // sequence. + while (I <= MaxDedupSize) { + // This is a linear algorithm that deduplicates adjacent repeated + // sequences of size I. The deduplication detection runs on a sliding + // window whose size is 2*I and it keeps sliding the window to deduplicate + // the data inside. Once duplication is detected, deduplicate it by + // skipping the right half part of the window, otherwise just copy back + // the new one by appending them at the back of End pointer(for the next + // iteration). + // + // For example: + // Input: [a1, a2, b1, b2] + // (Added index to distinguish the same char, the origin is [a, a, b, + // b], the size of the dedup window is 2(I = 1) at the beginning) + // + // 1) The initial status is a dummy window[null, a1], then just copy the + // right half of the window(End = 0), then slide the window. + // Result: [a1], a2, b1, b2 (End points to the element right before ], + // after ] is the data of the previous iteration) + // + // 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of + // the window i.e the duplication happen. Only slide the window. + // Result: [a1], a2, b1, b2 + // + // 3) Next window is [a2, b1], copy the right half of the window(b1 is + // new) to the End and slide the window. + // Result: [a1, b1], b1, b2 + // + // 4) Next window is [b1, b2], same to 2), skip b2. + // Result: [a1, b1], b1, b2 + // After resize, it will be [a, b] + + // Use pointers like below to do comparison inside the window + // [a b c a b c] + // | | | | | + // LeftBoundary Left Right Left+I Right+I + // A duplication found if Left < LeftBoundry. + + int32_t Right = I - 1; + End = I; + int32_t LeftBoundary = 0; + while (Right + I < Context.size()) { + // To avoids scanning a part of a sequence repeatedly, it finds out + // the common suffix of two hald in the window. The common suffix will + // serve as the common prefix of next possible pair of duplicate + // sequences. The non-common part will be ignored and never scanned + // again. + + // For example. + // Input: [a, b1], c1, b2, c2 + // I = 2 + // + // 1) For the window [a, b1, c1, b2], non-common-suffix for the right + // part is 'c1', copy it and only slide the window 1 step. + // Result: [a, b1, c1], b2, c2 + // + // 2) Next window is [b1, c1, b2, c2], so duplication happen. + // Result after resize: [a, b, c] + + int32_t Left = Right; + while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) { + // Find the longest suffix inside the window. When stops, Left points + // at the diverging point in the current sequence. + Left--; + } + + bool DuplicationFound = (Left < LeftBoundary); + // Don't need to recheck the data before Right + LeftBoundary = Right + 1; + if (DuplicationFound) { + // Duplication found, skip right half of the window. + Right += I; + } else { + // Copy the non-common-suffix part of the adjacent sequence. + std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1, + BeginIter + End); + End += Left + I - Right; + // Only slide the window by the size of non-common-suffix + Right = Left + I; + } + } + // Don't forget the remaining part that's not scanned. + std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End); + End += Context.size() - Right - 1; + I++; + Context.resize(End); + } + } + protected: // Lookup or create FunctionSamples for the context FunctionSamples &getFunctionProfileForContext(StringRef ContextId); @@ -109,6 +214,11 @@ const BranchSample &BranchCounters, ProfiledBinary *Binary); void populateInferredFunctionSamples(); + +public: + // Deduplicate adjacent repeated context sequences up to a given sequence + // length. -1 means no size limit. + static int32_t MaxCompressionSize; }; using ProbeCounterMap = std::unordered_map; @@ -127,22 +237,23 @@ ProbeCounterMap &ProbeCounter, ProfiledBinary *Binary); // Fill in function body samples from probes - void populateBodySamplesWithProbes(const RangeSample &RangeCounter, - StringRef PrefixContextId, - ProfiledBinary *Binary); + void + populateBodySamplesWithProbes(const RangeSample &RangeCounter, + SmallVectorImpl &ContextStrStack, + ProfiledBinary *Binary); // Fill in boundary samples for a call probe - void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter, - StringRef PrefixContextId, - ProfiledBinary *Binary); + void populateBoundarySamplesWithProbes( + const BranchSample &BranchCounter, + SmallVectorImpl &ContextStrStack, ProfiledBinary *Binary); // Helper function to get FunctionSamples for the leaf inlined context - FunctionSamples &getFunctionProfileForLeafProbe( - StringRef PrefixContextId, - SmallVector &LeafInlinedContext, - const PseudoProbeFuncDesc *LeafFuncDesc); + FunctionSamples & + getFunctionProfileForLeafProbe(SmallVectorImpl &ContextStrStack, + const PseudoProbeFuncDesc *LeafFuncDesc); // Helper function to get FunctionSamples for the leaf probe - FunctionSamples &getFunctionProfileForLeafProbe(StringRef PrefixContextId, - const PseudoProbe *LeafProbe, - ProfiledBinary *Binary); + FunctionSamples & + getFunctionProfileForLeafProbe(SmallVectorImpl &ContextStrStack, + const PseudoProbe *LeafProbe, + ProfiledBinary *Binary); }; } // end namespace sampleprof diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -22,12 +22,22 @@ clEnumValN(SPF_GCC, "gcc", "GCC encoding (only meaningful for -sample)"))); +static cl::opt RecursionCompression( + "compress-recursion", + cl::desc("Compressing recursion by deduplicating adjacent frame " + "sequences up to the specified size. -1 means no size limit."), + cl::Hidden, + cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); + using namespace llvm; using namespace sampleprof; namespace llvm { namespace sampleprof { +// Initialize the MaxCompressionSize to -1 which means no size limit +int32_t CSProfileGenerator::MaxCompressionSize = -1; + static bool usePseudoProbes(const BinarySampleCounterMap &BinarySampleCounters) { return BinarySampleCounters.size() && @@ -319,26 +329,16 @@ } } -// Helper function to extract context prefix -// PrefixContextId is the context id string except for the leaf probe's -// context, the final ContextId will be: -// ContextId = PrefixContextId + LeafContextId; -// Remind that the string in ContextStrStack is in callee-caller order -// So process the string vector reversely -static std::string -extractPrefixContextId(const SmallVector &Probes, - ProfiledBinary *Binary) { - SmallVector ContextStrStack; +// Helper function to extract context prefix string stack +// Extract context stack for reusing, leaf context stack will +// be added compressed while looking up function profile +static void +extractPrefixContextStack(SmallVectorImpl &ContextStrStack, + const SmallVectorImpl &Probes, + ProfiledBinary *Binary) { for (const auto *P : Probes) { Binary->getInlineContextForProbe(P, ContextStrStack, true); } - std::ostringstream OContextStr; - for (auto &CxtStr : ContextStrStack) { - if (OContextStr.str().size()) - OContextStr << " @ "; - OContextStr << CxtStr; - } - return OContextStr.str(); } void PseudoProbeCSProfileGenerator::generateProfile() { @@ -350,15 +350,15 @@ for (const auto &CI : BI.second) { const ProbeBasedCtxKey *CtxKey = dyn_cast(CI.first.getPtr()); - std::string PrefixContextId = - extractPrefixContextId(CtxKey->Probes, Binary); + SmallVector ContextStrStack; + extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary); // Fill in function body samples from probes, also infer caller's samples // from callee's probe - populateBodySamplesWithProbes(CI.second.RangeCounter, PrefixContextId, + populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack, Binary); // Fill in boundary samples for a call probe populateBoundarySamplesWithProbes(CI.second.BranchCounter, - PrefixContextId, Binary); + ContextStrStack, Binary); } } } @@ -403,8 +403,8 @@ } void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes( - const RangeSample &RangeCounter, StringRef PrefixContextId, - ProfiledBinary *Binary) { + const RangeSample &RangeCounter, + SmallVectorImpl &ContextStrStack, ProfiledBinary *Binary) { ProbeCounterMap ProbeCounter; // Extract the top frame probes by looking up each address among the range in // the Address2ProbeMap @@ -413,7 +413,7 @@ const PseudoProbe *Probe = PI.first; uint64_t Count = PI.second; FunctionSamples &FunctionProfile = - getFunctionProfileForLeafProbe(PrefixContextId, Probe, Binary); + getFunctionProfileForLeafProbe(ContextStrStack, Probe, Binary); FunctionProfile.addBodySamples(Probe->Index, 0, Count); FunctionProfile.addTotalSamples(Count); @@ -446,8 +446,8 @@ } void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes( - const BranchSample &BranchCounter, StringRef PrefixContextId, - ProfiledBinary *Binary) { + const BranchSample &BranchCounter, + SmallVectorImpl &ContextStrStack, ProfiledBinary *Binary) { for (auto BI : BranchCounter) { uint64_t SourceOffset = BI.first.first; uint64_t TargetOffset = BI.first.second; @@ -457,7 +457,7 @@ if (CallProbe == nullptr) continue; FunctionSamples &FunctionProfile = - getFunctionProfileForLeafProbe(PrefixContextId, CallProbe, Binary); + getFunctionProfileForLeafProbe(ContextStrStack, CallProbe, Binary); FunctionProfile.addBodySamples(CallProbe->Index, 0, Count); FunctionProfile.addTotalSamples(Count); StringRef CalleeName = FunctionSamples::getCanonicalFnName( @@ -470,25 +470,26 @@ } FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe( - StringRef PrefixContextId, SmallVector &LeafInlinedContext, + SmallVectorImpl &ContextStrStack, const PseudoProbeFuncDesc *LeafFuncDesc) { - assert(LeafInlinedContext.size() && - "Profile context must have the leaf frame"); - std::ostringstream OContextStr; - OContextStr << PrefixContextId.str(); + assert(ContextStrStack.size() && "Profile context must have the leaf frame"); + // Compress the context string except for the leaf frame + std::string LeafFrame = ContextStrStack.back(); + ContextStrStack.pop_back(); + CSProfileGenerator::compressRecursionContext(ContextStrStack); - for (uint32_t I = 0; I < LeafInlinedContext.size() - 1; I++) { + std::ostringstream OContextStr; + for (uint32_t I = 0; I < ContextStrStack.size(); I++) { if (OContextStr.str().size()) OContextStr << " @ "; - OContextStr << LeafInlinedContext[I]; + OContextStr << ContextStrStack[I]; } // For leaf inlined context with the top frame, we should strip off the top // frame's probe id, like: // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" if (OContextStr.str().size()) OContextStr << " @ "; - StringRef LeafLoc = LeafInlinedContext.back(); - OContextStr << LeafLoc.split(":").first.str(); + OContextStr << StringRef(LeafFrame).split(":").first.str(); FunctionSamples &FunctionProile = getFunctionProfileForContext(OContextStr.str()); @@ -497,17 +498,18 @@ } FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe( - StringRef PrefixContextId, const PseudoProbe *LeafProbe, + SmallVectorImpl &ContextStrStack, const PseudoProbe *LeafProbe, ProfiledBinary *Binary) { - SmallVector LeafInlinedContext; - Binary->getInlineContextForProbe(LeafProbe, LeafInlinedContext); + // Explicitly copy the context for appending the leaf context + SmallVector ContextStrStackCopy(ContextStrStack.begin(), + ContextStrStack.end()); + Binary->getInlineContextForProbe(LeafProbe, ContextStrStackCopy); // Note that the context from probe doesn't include leaf frame, // hence we need to retrieve and append the leaf frame. const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->GUID); - LeafInlinedContext.emplace_back(FuncDesc->FuncName + ":" + - Twine(LeafProbe->Index).str()); - return getFunctionProfileForLeafProbe(PrefixContextId, LeafInlinedContext, - FuncDesc); + ContextStrStackCopy.emplace_back(FuncDesc->FuncName + ":" + + Twine(LeafProbe->Index).str()); + return getFunctionProfileForLeafProbe(ContextStrStackCopy, FuncDesc); } } // end namespace sampleprof diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -243,7 +243,7 @@ } void getInlineContextForProbe(const PseudoProbe *Probe, - SmallVector &InlineContextStack, + SmallVectorImpl &InlineContextStack, bool IncludeLeaf = false) const { return ProbeDecoder.getInlineContextForProbe(Probe, InlineContextStack, IncludeLeaf); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -8,6 +8,7 @@ #include "ProfiledBinary.h" #include "ErrorHandling.h" +#include "ProfileGenerator.h" #include "llvm/ADT/Triple.h" #include "llvm/Demangle/Demangle.h" #include "llvm/Support/CommandLine.h" @@ -128,7 +129,7 @@ std::string ProfiledBinary::getExpandedContextStr(const std::list &Stack) const { std::string ContextStr; - SmallVector ContextVec; + SmallVector ContextVec; // Process from frame root to leaf for (auto Iter = Stack.rbegin(); Iter != Stack.rend(); Iter++) { uint64_t Offset = virtualAddrToOffset(*Iter); @@ -139,21 +140,22 @@ } assert(ContextVec.size() && "Context length should be at least 1"); + // Compress the context string except for the leaf frame + std::string LeafFrame = ContextVec.back(); + ContextVec.pop_back(); + CSProfileGenerator::compressRecursionContext(ContextVec); std::ostringstream OContextStr; for (uint32_t I = 0; I < (uint32_t)ContextVec.size(); I++) { if (OContextStr.str().size()) { OContextStr << " @ "; } - - if (I == ContextVec.size() - 1) { - // Only keep the function name for the leaf frame - StringRef Ref(ContextVec[I]); - OContextStr << Ref.split(":").first.str(); - } else { - OContextStr << ContextVec[I]; - } + OContextStr << ContextVec[I]; } + // Only keep the function name for the leaf frame + if (OContextStr.str().size()) + OContextStr << " @ "; + OContextStr << StringRef(LeafFrame).split(":").first.str(); return OContextStr.str(); } diff --git a/llvm/tools/llvm-profgen/PseudoProbe.h b/llvm/tools/llvm-profgen/PseudoProbe.h --- a/llvm/tools/llvm-profgen/PseudoProbe.h +++ b/llvm/tools/llvm-profgen/PseudoProbe.h @@ -138,7 +138,7 @@ // Get the inlined context by traversing current inline tree backwards, // each tree node has its InlineSite which is taken as the context. // \p ContextStack is populated in root to leaf order - void getInlineContext(SmallVector &ContextStack, + void getInlineContext(SmallVectorImpl &ContextStack, const GUIDProbeFunctionMap &GUID2FuncMAP, bool ShowName) const; // Helper function to get the string from context stack @@ -214,7 +214,7 @@ // IncludeLeaf = false, Output: [main:1, foo:2] void getInlineContextForProbe(const PseudoProbe *Probe, - SmallVector &InlineContextStack, + SmallVectorImpl &InlineContextStack, bool IncludeLeaf) const; const AddressProbesMap &getAddress2ProbesMap() const { diff --git a/llvm/tools/llvm-profgen/PseudoProbe.cpp b/llvm/tools/llvm-profgen/PseudoProbe.cpp --- a/llvm/tools/llvm-profgen/PseudoProbe.cpp +++ b/llvm/tools/llvm-profgen/PseudoProbe.cpp @@ -34,7 +34,7 @@ OS << "Hash: " << FuncHash << "\n"; } -void PseudoProbe::getInlineContext(SmallVector &ContextStack, +void PseudoProbe::getInlineContext(SmallVectorImpl &ContextStack, const GUIDProbeFunctionMap &GUID2FuncMAP, bool ShowName) const { uint32_t Begin = ContextStack.size(); @@ -320,7 +320,7 @@ } void PseudoProbeDecoder::getInlineContextForProbe( - const PseudoProbe *Probe, SmallVector &InlineContextStack, + const PseudoProbe *Probe, SmallVectorImpl &InlineContextStack, bool IncludeLeaf) const { Probe->getInlineContext(InlineContextStack, GUID2FuncDescMap, true); if (!IncludeLeaf) diff --git a/llvm/unittests/tools/CMakeLists.txt b/llvm/unittests/tools/CMakeLists.txt --- a/llvm/unittests/tools/CMakeLists.txt +++ b/llvm/unittests/tools/CMakeLists.txt @@ -7,4 +7,4 @@ add_subdirectory( llvm-exegesis ) - +add_subdirectory(llvm-profgen) diff --git a/llvm/unittests/tools/llvm-profgen/CMakeLists.txt b/llvm/unittests/tools/llvm-profgen/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/llvm/unittests/tools/llvm-profgen/CMakeLists.txt @@ -0,0 +1,5 @@ +add_llvm_unittest(LLVMProfgenTests + ContextCompressionTest.cpp + ) + +target_link_libraries(LLVMProfgenTests PRIVATE LLVMTestingSupport) diff --git a/llvm/unittests/tools/llvm-profgen/ContextCompressionTest.cpp b/llvm/unittests/tools/llvm-profgen/ContextCompressionTest.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/tools/llvm-profgen/ContextCompressionTest.cpp @@ -0,0 +1,36 @@ +//===-- ContextCompressionTest.cpp -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "../tools/llvm-profgen/ProfileGenerator.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace sampleprof; + +TEST(TestCompression, TestNoSizeLimit1) { + SmallVector Context = {"a", "b", "c", "a", "b", "c"}; + SmallVector Expect = {"a", "b", "c"}; + CSProfileGenerator::compressRecursionContext(Context, -1); + EXPECT_TRUE(std::equal(Context.begin(), Context.end(), Expect.begin())); +} + +TEST(TestCompression, TestNoSizeLimit2) { + SmallVector Context = {"m", "a", "a", "b", "c", "a", + "b", "c", "b", "c", "d"}; + SmallVector Expect = {"m", "a", "b", "c", "d"}; + CSProfileGenerator::compressRecursionContext(Context, -1); + EXPECT_TRUE(std::equal(Context.begin(), Context.end(), Expect.begin())); +} + +TEST(TestCompression, TestMaxDedupSize) { + SmallVector Context = {"m", "a", "a", "b", "c", "a", + "b", "c", "b", "c", "d"}; + SmallVector Expect = {"m", "a", "b", "c", + "a", "b", "c", "d"}; + CSProfileGenerator::compressRecursionContext(Context, 2); + EXPECT_TRUE(std::equal(Context.begin(), Context.end(), Expect.begin())); +}