diff --git a/llvm/test/tools/llvm-profgen/merge-cold-profile.test b/llvm/test/tools/llvm-profgen/merge-cold-profile.test --- a/llvm/test/tools/llvm-profgen/merge-cold-profile.test +++ b/llvm/test/tools/llvm-profgen/merge-cold-profile.test @@ -11,7 +11,7 @@ ; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK-UNMERGED ; Test --csprof-frame-depth-for-cold-context -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-cold-count=100 --csprof-trim-cold-context=0 --csprof-frame-depth-for-cold-context=2 +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-cold-count=100 --csprof-trim-cold-context=0 --csprof-max-cold-context-depth=2 ; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-COLD-CONTEXT-LENGTH ; CHECK: [fa]:14:4 diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test --- a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test +++ b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test @@ -3,6 +3,8 @@ ; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --profile-summary-cold-count=0 ; RUN: FileCheck %s --input-file %t +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --compress-recursion=0 --profile-summary-cold-count=0 --csprof-max-context-depth=2 +; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-MAX-CTX-DEPTH ; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb]:48:0 ; CHECK-UNCOMPRESS: 1: 11 @@ -21,6 +23,20 @@ ; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:2:0 ; CHECK-UNCOMPRESS: 2: 1 fa:1 +; CHECK-MAX-CTX-DEPTH:[foo:3 @ fa:2 @ fb]:47:0 +; CHECK-MAX-CTX-DEPTH: 1: 11 +; CHECK-MAX-CTX-DEPTH:[main:1 @ foo:3 @ fa]:13:0 +; CHECK-MAX-CTX-DEPTH: 1: 1 +; CHECK-MAX-CTX-DEPTH: 2: 2 +; CHECK-MAX-CTX-DEPTH:[fa:2 @ fb:2 @ fa]:8:0 +; CHECK-MAX-CTX-DEPTH: 1: 1 +; CHECK-MAX-CTX-DEPTH: 2: 1 +; CHECK-MAX-CTX-DEPTH: 4: 1 +; CHECK-MAX-CTX-DEPTH:[main:1 @ foo]:7:0 +; CHECK-MAX-CTX-DEPTH: 2: 1 +; CHECK-MAX-CTX-DEPTH: 3: 2 fa:1 +; CHECK-MAX-CTX-DEPTH:[fb:2 @ fa:2 @ fb]:1:0 + ; CHECK: [main:1 @ foo:3 @ fa:2 @ fb]:48:0 ; CHECK: 1: 11 diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test --- a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test @@ -5,7 +5,8 @@ ; RUN: FileCheck %s --input-file %t ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe-nommap.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --show-unwinder-output --profile-summary-cold-count=0 | FileCheck %s --check-prefix=CHECK-UNWINDER ; RUN: FileCheck %s --input-file %t - +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 --profile-summary-cold-count=0 --csprof-max-context-depth=0 +; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-MAX-CTX-DEPTH ; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:4:1 ; CHECK-UNCOMPRESS: 1: 1 @@ -64,6 +65,25 @@ ; CHECK-UNCOMPRESS: !CFGChecksum: 563022570642068 +; CHECK-MAX-CTX-DEPTH: [fb]:19:6 +; CHECK-MAX-CTX-DEPTH: 1: 6 +; CHECK-MAX-CTX-DEPTH: 2: 3 +; CHECK-MAX-CTX-DEPTH: 3: 3 +; CHECK-MAX-CTX-DEPTH: 4: 0 +; CHECK-MAX-CTX-DEPTH: 5: 4 fb:4 +; CHECK-MAX-CTX-DEPTH: 6: 3 fa:3 +; CHECK-MAX-CTX-DEPTH: !CFGChecksum: 563022570642068 +; CHECK-MAX-CTX-DEPTH: [fa]:14:4 +; CHECK-MAX-CTX-DEPTH: 1: 4 +; CHECK-MAX-CTX-DEPTH: 3: 4 +; CHECK-MAX-CTX-DEPTH: 4: 2 +; CHECK-MAX-CTX-DEPTH: 5: 1 +; CHECK-MAX-CTX-DEPTH: 6: 0 +; CHECK-MAX-CTX-DEPTH: 7: 2 fb:2 +; CHECK-MAX-CTX-DEPTH: 8: 1 fa:1 +; CHECK-MAX-CTX-DEPTH: !CFGChecksum: 563070469352221 + + ; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb]:13:4 ; CHECK: 1: 4 ; CHECK: 2: 3 diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -109,6 +109,9 @@ } CSProfileGenerator::compressRecursionContext( ProbeBasedKey->Probes); + CSProfileGenerator::trimContext( + ProbeBasedKey->Probes); + ProbeBasedKey->genHashCode(); return ProbeBasedKey; } diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -70,6 +70,16 @@ public: void generateProfile() override; + // Trim the context stack at a given depth. + template + static void trimContext(SmallVectorImpl &S, int Depth = MaxContextDepth) { + if (Depth < 0 || static_cast(Depth) >= S.size()) + return; + std::copy(S.begin() + S.size() - static_cast(Depth), S.end(), + S.begin()); + S.resize(Depth); + } + // Remove adjacent repeated context sequences up to a given sequence length, // -1 means no size limit. Note that repeated sequences are identified based // on the exact call site, this is finer granularity than function recursion. @@ -212,6 +222,7 @@ // Deduplicate adjacent repeated context sequences up to a given sequence // length. -1 means no size limit. static int32_t MaxCompressionSize; + static int MaxContextDepth; }; using ProbeCounterMap = diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -45,10 +45,15 @@ "is still smaller than threshold, it will be trimmed.")); static cl::opt CSProfColdContextFrameDepth( - "csprof-frame-depth-for-cold-context", cl::init(1), cl::ZeroOrMore, + "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore, cl::desc("Keep the last K frames while merging cold profile. 1 means the " "context-less base profile")); +static cl::opt CSProfMaxContextDepth( + "csprof-max-context-depth", cl::ZeroOrMore, + cl::desc("Trim context stack at a given depth. -1 means no trim."), + cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth)); + static cl::opt EnableCSPreInliner( "csspgo-preinliner", cl::Hidden, cl::init(false), cl::desc("Run a global pre-inliner to merge context profile based on " @@ -65,6 +70,8 @@ // Initialize the MaxCompressionSize to -1 which means no size limit int32_t CSProfileGenerator::MaxCompressionSize = -1; +int CSProfileGenerator::MaxContextDepth = -1; + static bool usePseudoProbes(const BinarySampleCounterMap &BinarySampleCounters) { return BinarySampleCounters.size() && @@ -608,6 +615,7 @@ std::string LeafFrame = ContextStrStack.back(); ContextStrStack.pop_back(); CSProfileGenerator::compressRecursionContext(ContextStrStack); + CSProfileGenerator::trimContext(ContextStrStack); std::ostringstream OContextStr; for (uint32_t I = 0; I < ContextStrStack.size(); I++) { diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -125,6 +125,7 @@ std::string LeafFrame = ContextVec.back(); ContextVec.pop_back(); CSProfileGenerator::compressRecursionContext(ContextVec); + CSProfileGenerator::trimContext(ContextVec); std::ostringstream OContextStr; for (uint32_t I = 0; I < (uint32_t)ContextVec.size(); I++) {