diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test --- a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test +++ b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test @@ -3,6 +3,8 @@ ; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --profile-summary-cold-count=0 ; RUN: FileCheck %s --input-file %t +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --compress-recursion=0 --profile-summary-cold-count=0 --csprof-ctx-stack-cap=2 +; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-STACK-CAP ; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb]:48:0 ; CHECK-UNCOMPRESS: 1: 11 @@ -21,6 +23,20 @@ ; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:2:0 ; CHECK-UNCOMPRESS: 2: 1 fa:1 +; CHECK-STACK-CAP:[foo:3 @ fa:2 @ fb]:47:0 +; CHECK-STACK-CAP: 1: 11 +; CHECK-STACK-CAP:[main:1 @ foo:3 @ fa]:13:0 +; CHECK-STACK-CAP: 1: 1 +; CHECK-STACK-CAP: 2: 2 +; CHECK-STACK-CAP:[fa:2 @ fb:2 @ fa]:8:0 +; CHECK-STACK-CAP: 1: 1 +; CHECK-STACK-CAP: 2: 1 +; CHECK-STACK-CAP: 4: 1 +; CHECK-STACK-CAP:[main:1 @ foo]:7:0 +; CHECK-STACK-CAP: 2: 1 +; CHECK-STACK-CAP: 3: 2 fa:1 +; CHECK-STACK-CAP:[fb:2 @ fa:2 @ fb]:1:0 + ; CHECK: [main:1 @ foo:3 @ fa:2 @ fb]:48:0 ; CHECK: 1: 11 diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test --- a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test @@ -5,7 +5,8 @@ ; RUN: FileCheck %s --input-file %t ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe-nommap.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 | FileCheck %s --check-prefix=CHECK-UNWINDER ; RUN: FileCheck %s --input-file %t - +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 --profile-summary-cold-count=0 --csprof-ctx-stack-cap=0 +; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-STACK-CAP ; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:4:1 ; CHECK-UNCOMPRESS: 1: 1 @@ -64,6 +65,25 @@ ; CHECK-UNCOMPRESS: !CFGChecksum: 563022570642068 +; CHECK-STACK-CAP: [fb]:19:6 +; CHECK-STACK-CAP: 1: 6 +; CHECK-STACK-CAP: 2: 3 +; CHECK-STACK-CAP: 3: 3 +; CHECK-STACK-CAP: 4: 0 +; CHECK-STACK-CAP: 5: 4 fb:4 +; CHECK-STACK-CAP: 6: 3 fa:3 +; CHECK-STACK-CAP: !CFGChecksum: 563022570642068 +; CHECK-STACK-CAP: [fa]:14:4 +; CHECK-STACK-CAP: 1: 4 +; CHECK-STACK-CAP: 3: 4 +; CHECK-STACK-CAP: 4: 2 +; CHECK-STACK-CAP: 5: 1 +; CHECK-STACK-CAP: 6: 0 +; CHECK-STACK-CAP: 7: 2 fb:2 +; CHECK-STACK-CAP: 8: 1 fa:1 +; CHECK-STACK-CAP: !CFGChecksum: 563070469352221 + + ; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb]:13:4 ; CHECK: 1: 4 ; CHECK: 2: 3 diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -20,6 +20,7 @@ extern cl::opt ShowDisassemblyOnly; extern cl::opt ShowSourceLocations; +extern cl::opt CSProfCtxStackCap; namespace llvm { namespace sampleprof { @@ -109,6 +110,9 @@ } CSProfileGenerator::compressRecursionContext( ProbeBasedKey->Probes); + CSProfileGenerator::capContextStack( + ProbeBasedKey->Probes, CSProfCtxStackCap); + ProbeBasedKey->genHashCode(); return ProbeBasedKey; } diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -70,6 +70,16 @@ public: void generateProfile() override; + // Cap the context stack by cutting off from the bottom at a given depth. + template + static void capContextStack(SmallVectorImpl &S, int Depth) { + if (Depth < 0 || static_cast(Depth) >= S.size()) + return; + std::copy(S.begin() + S.size() - static_cast(Depth), S.end(), + S.begin()); + S.resize(Depth); + } + // Remove adjacent repeated context sequences up to a given sequence length, // -1 means no size limit. Note that repeated sequences are identified based // on the exact call site, this is finer granularity than function recursion. diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -49,6 +49,10 @@ cl::desc("Keep the last K frames while merging cold profile. 1 means the " "context-less base profile")); +cl::opt CSProfCtxStackCap( + "csprof-ctx-stack-cap", cl::init(20), cl::ZeroOrMore, + cl::desc("Cap context stack at a given depth. No cap if the input is -1.")); + static cl::opt EnableCSPreInliner( "csspgo-preinliner", cl::Hidden, cl::init(false), cl::desc("Run a global pre-inliner to merge context profile based on " @@ -608,6 +612,7 @@ std::string LeafFrame = ContextStrStack.back(); ContextStrStack.pop_back(); CSProfileGenerator::compressRecursionContext(ContextStrStack); + CSProfileGenerator::capContextStack(ContextStrStack, CSProfCtxStackCap); std::ostringstream OContextStr; for (uint32_t I = 0; I < ContextStrStack.size(); I++) { diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -38,6 +38,8 @@ "show-pseudo-probe", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Print pseudo probe section and disassembled info.")); +extern cl::opt CSProfCtxStackCap; + namespace llvm { namespace sampleprof { @@ -125,6 +127,8 @@ std::string LeafFrame = ContextVec.back(); ContextVec.pop_back(); CSProfileGenerator::compressRecursionContext(ContextVec); + CSProfileGenerator::capContextStack(ContextVec, + CSProfCtxStackCap); std::ostringstream OContextStr; for (uint32_t I = 0; I < (uint32_t)ContextVec.size(); I++) {