diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h --- a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h @@ -48,7 +48,7 @@ struct FlowBlock { uint64_t Index; uint64_t Weight{0}; - bool UnknownWeight{false}; + bool HasUnknownWeight{false}; uint64_t Flow{0}; bool HasSelfEdge{false}; std::vector SuccJumps; @@ -74,9 +74,45 @@ std::vector Blocks; std::vector Jumps; /// The index of the entry block. - uint64_t Entry; + uint64_t Entry{0}; }; +/// Various thresholds and options controlling the behavior of the profile +/// inference algorithm. Default values are tuned for several large-scale +/// applications, and can be modified via corresponding command-line flags. +struct ProfiParams { + /// Evenly distribute flow when there are multiple equally likely options. + bool EvenFlowDistribution{false}; + + /// Evenly re-distribute flow among unknown subgraphs. + bool RebalanceUnknown{false}; + + /// Join isolated components having positive flow. + bool JoinIslands{false}; + + /// The cost of increasing a block's count by one. + unsigned CostBlockInc{0}; + + /// The cost of decreasing a block's count by one. + unsigned CostBlockDec{0}; + + /// The cost of increasing a count of zero-weight block by one. + unsigned CostBlockZeroInc{0}; + + /// The cost of increasing the entry block's count by one. + unsigned CostBlockEntryInc{0}; + + /// The cost of decreasing the entry block's count by one. + unsigned CostBlockEntryDec{0}; + + /// The cost of increasing an unknown block's count by one. + unsigned CostBlockUnknownInc{0}; + + /// The cost of taking an unlikely block/jump. + const int64_t CostUnlikely = ((int64_t)1) << 30; +}; + +void applyFlowInference(const ProfiParams &Params, FlowFunction &Func); void applyFlowInference(FlowFunction &Func); /// Sample profile inference pass. @@ -171,10 +207,10 @@ for (const auto *BB : BasicBlocks) { FlowBlock Block; if (SampleBlockWeights.find(BB) != SampleBlockWeights.end()) { - Block.UnknownWeight = false; + Block.HasUnknownWeight = false; Block.Weight = SampleBlockWeights[BB]; } else { - Block.UnknownWeight = true; + Block.HasUnknownWeight = true; Block.Weight = 0; } Block.Index = Func.Blocks.size(); diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h --- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -76,7 +76,6 @@ } // end namespace afdo_detail extern cl::opt SampleProfileUseProfi; -extern cl::opt SampleProfileInferEntryCount; template class SampleProfileLoaderBaseImpl { public: @@ -922,8 +921,7 @@ if (SampleProfileUseProfi) { const BasicBlockT *EntryBB = getEntryBB(&F); ErrorOr EntryWeight = getBlockWeight(EntryBB); - if (BlockWeights[EntryBB] > 0 && - (SampleProfileInferEntryCount || !EntryWeight)) { + if (BlockWeights[EntryBB] > 0) { getFunction(F).setEntryCount( ProfileCount(BlockWeights[EntryBB], Function::PCT_Real), &InlinedGUIDs); diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp --- a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp +++ b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp @@ -26,34 +26,42 @@ namespace { -static cl::opt SampleProfileEvenCountDistribution( - "sample-profile-even-count-distribution", cl::init(true), cl::Hidden, - cl::desc("Try to evenly distribute counts when there are multiple equally " +static cl::opt SampleProfileEvenFlowDistribution( + "sample-profile-even-flow-distribution", cl::init(true), cl::Hidden, + cl::desc("Try to evenly distribute flow when there are multiple equally " "likely options.")); -static cl::opt SampleProfileMaxDfsCalls( - "sample-profile-max-dfs-calls", cl::init(10), cl::Hidden, - cl::desc("Maximum number of dfs iterations for even count distribution.")); +static cl::opt SampleProfileRebalanceUnknown( + "sample-profile-rebalance-unknown", cl::init(true), cl::Hidden, + cl::desc("Evenly re-distribute flow among unknown subgraphs.")); -static cl::opt SampleProfileProfiCostInc( - "sample-profile-profi-cost-inc", cl::init(10), cl::Hidden, - cl::desc("A cost of increasing a block's count by one.")); +static cl::opt SampleProfileJoinIslands( + "sample-profile-join-islands", cl::init(true), cl::Hidden, + cl::desc("Join isolated components having positive flow.")); -static cl::opt SampleProfileProfiCostDec( - "sample-profile-profi-cost-dec", cl::init(20), cl::Hidden, - cl::desc("A cost of decreasing a block's count by one.")); +static cl::opt SampleProfileProfiCostBlockInc( + "sample-profile-profi-cost-block-inc", cl::init(10), cl::Hidden, + cl::desc("The cost of increasing a block's count by one.")); -static cl::opt SampleProfileProfiCostIncZero( - "sample-profile-profi-cost-inc-zero", cl::init(11), cl::Hidden, - cl::desc("A cost of increasing a count of zero-weight block by one.")); +static cl::opt SampleProfileProfiCostBlockDec( + "sample-profile-profi-cost-block-dec", cl::init(20), cl::Hidden, + cl::desc("The cost of decreasing a block's count by one.")); -static cl::opt SampleProfileProfiCostIncEntry( - "sample-profile-profi-cost-inc-entry", cl::init(40), cl::Hidden, - cl::desc("A cost of increasing the entry block's count by one.")); +static cl::opt SampleProfileProfiCostBlockEntryInc( + "sample-profile-profi-cost-block-entry-inc", cl::init(40), cl::Hidden, + cl::desc("The cost of increasing the entry block's count by one.")); -static cl::opt SampleProfileProfiCostDecEntry( - "sample-profile-profi-cost-dec-entry", cl::init(10), cl::Hidden, - cl::desc("A cost of decreasing the entry block's count by one.")); +static cl::opt SampleProfileProfiCostBlockEntryDec( + "sample-profile-profi-cost-block-entry-dec", cl::init(10), cl::Hidden, + cl::desc("The cost of decreasing the entry block's count by one.")); + +static cl::opt SampleProfileProfiCostBlockZeroInc( + "sample-profile-profi-cost-block-zero-inc", cl::init(11), cl::Hidden, + cl::desc("The cost of increasing a count of zero-weight block by one.")); + +static cl::opt SampleProfileProfiCostBlockUnknownInc( + "sample-profile-profi-cost-block-unknown-inc", cl::init(0), cl::Hidden, + cl::desc("The cost of increasing an unknown block's count by one.")); /// A value indicating an infinite flow/capacity/weight of a block/edge. /// Not using numeric_limits::max(), as the values can be summed up @@ -76,6 +84,8 @@ /// minimum total cost respecting the given edge capacities. class MinCostMaxFlow { public: + MinCostMaxFlow(const ProfiParams &Params) : Params(Params) {} + // Initialize algorithm's data structures for a network of a given size. void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) { Source = SourceNode; @@ -83,7 +93,7 @@ Nodes = std::vector(NodeCount); Edges = std::vector>(NodeCount, std::vector()); - if (SampleProfileEvenCountDistribution) + if (Params.EvenFlowDistribution) AugmentingEdges = std::vector>(NodeCount, std::vector()); } @@ -166,11 +176,6 @@ return Flow; } - /// A cost of taking an unlikely jump. - static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30; - /// Minimum BaseDistance for the jump distance values in island joining. - static constexpr uint64_t MinBaseDistance = 10000; - private: /// Iteratively find an augmentation path/dag in the network and send the /// flow along its edges. The method returns the number of applied iterations. @@ -180,7 +185,7 @@ uint64_t PathCapacity = computeAugmentingPathCapacity(); while (PathCapacity > 0) { bool Progress = false; - if (SampleProfileEvenCountDistribution) { + if (Params.EvenFlowDistribution) { // Identify node/edge candidates for augmentation identifyShortestEdges(PathCapacity); @@ -253,7 +258,7 @@ // from Source to Target; it follows from inequalities // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] // >= Dist[Source, V] - if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0) + if (!Params.EvenFlowDistribution && Nodes[Target].Distance == 0) break; if (Nodes[Src].Distance > Nodes[Target].Distance) continue; @@ -342,7 +347,7 @@ if (Edge.OnShortestPath) { // If we haven't seen Edge.Dst so far, continue DFS search there - if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) { + if (Dst.Discovery == 0 && Dst.NumCalls < MaxDfsCalls) { Dst.Discovery = ++Time; Stack.emplace(Edge.Dst, 0); Dst.NumCalls++; @@ -512,6 +517,9 @@ } } + /// Maximum number of DFS iterations for DAG finding. + static constexpr uint64_t MaxDfsCalls = 10; + /// A node in a flow network. struct Node { /// The cost of the cheapest path from the source to the current node. @@ -566,6 +574,8 @@ uint64_t Target; /// Augmenting edges. std::vector> AugmentingEdges; + /// Params for flow computation. + const ProfiParams &Params; }; /// A post-processing adjustment of control flow. It applies two steps by @@ -586,18 +596,23 @@ /// class FlowAdjuster { public: - FlowAdjuster(FlowFunction &Func) : Func(Func) { + FlowAdjuster(const ProfiParams &Params, FlowFunction &Func) + : Params(Params), Func(Func) { assert(Func.Blocks[Func.Entry].isEntry() && "incorrect index of the entry block"); } // Run the post-processing void run() { - /// Adjust the flow to get rid of isolated components. - joinIsolatedComponents(); + if (Params.JoinIslands) { + /// Adjust the flow to get rid of isolated components. + joinIsolatedComponents(); + } - /// Rebalance the flow inside unknown subgraphs. - rebalanceUnknownSubgraphs(); + if (Params.RebalanceUnknown) { + /// Rebalance the flow inside unknown subgraphs. + rebalanceUnknownSubgraphs(); + } } private: @@ -736,12 +751,13 @@ /// To capture this objective with integer distances, we round off fractional /// parts to a multiple of 1 / BaseDistance. int64_t jumpDistance(FlowJump *Jump) const { + if (Jump->IsUnlikely) + return Params.CostUnlikely; + uint64_t BaseDistance = - std::max(MinCostMaxFlow::MinBaseDistance, + std::max(FlowAdjuster::MinBaseDistance, std::min(Func.Blocks[Func.Entry].Flow, - MinCostMaxFlow::AuxCostUnlikely / NumBlocks())); - if (Jump->IsUnlikely) - return MinCostMaxFlow::AuxCostUnlikely; + Params.CostUnlikely / NumBlocks())); if (Jump->Flow > 0) return BaseDistance + BaseDistance / Jump->Flow; return BaseDistance * NumBlocks(); @@ -786,13 +802,13 @@ bool canRebalanceAtRoot(const FlowBlock *SrcBlock) { // Do not attempt to find unknown subgraphs from an unknown or a // zero-flow block - if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0) + if (SrcBlock->HasUnknownWeight || SrcBlock->Flow == 0) return false; // Do not attempt to process subgraphs from a block w/o unknown sucessors bool HasUnknownSuccs = false; for (auto *Jump : SrcBlock->SuccJumps) { - if (Func.Blocks[Jump->Target].UnknownWeight) { + if (Func.Blocks[Jump->Target].HasUnknownWeight) { HasUnknownSuccs = true; break; } @@ -830,7 +846,7 @@ continue; // Process block Dst Visited[Dst] = true; - if (!Func.Blocks[Dst].UnknownWeight) { + if (!Func.Blocks[Dst].HasUnknownWeight) { KnownDstBlocks.push_back(&Func.Blocks[Dst]); } else { Queue.push(Dst); @@ -893,11 +909,11 @@ return false; // Ignore jumps out of SrcBlock to known blocks - if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock) + if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock) return true; // Ignore jumps to known blocks with zero flow - if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0) + if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0) return true; return false; @@ -935,7 +951,7 @@ break; // Keep an acyclic order of unknown blocks - if (Block->UnknownWeight && Block != SrcBlock) + if (Block->HasUnknownWeight && Block != SrcBlock) AcyclicOrder.push_back(Block); // Add to the queue all successors with zero local in-degree @@ -977,7 +993,7 @@ // Ditribute flow from the remaining blocks for (auto *Block : UnknownBlocks) { - assert(Block->UnknownWeight && "incorrect unknown subgraph"); + assert(Block->HasUnknownWeight && "incorrect unknown subgraph"); uint64_t BlockFlow = 0; // Block's flow is the sum of incoming flows for (auto *Jump : Block->PredJumps) { @@ -1019,7 +1035,11 @@ /// A constant indicating an arbitrary exit block of a function. static constexpr uint64_t AnyExitBlock = uint64_t(-1); + /// Minimum BaseDistance for the jump distance values in island joining. + static constexpr uint64_t MinBaseDistance = 10000; + /// Params for flow computation. + const ProfiParams &Params; /// The function. FlowFunction &Func; }; @@ -1029,7 +1049,8 @@ /// Every block is split into three nodes that are responsible for (i) an /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or /// reduction of the block weight. -void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { +void initializeNetwork(const ProfiParams &Params, MinCostMaxFlow &Network, + FlowFunction &Func) { uint64_t NumBlocks = Func.Blocks.size(); assert(NumBlocks > 1 && "Too few blocks in a function"); LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n"); @@ -1051,7 +1072,7 @@ // Create three nodes for every block of the function for (uint64_t B = 0; B < NumBlocks; B++) { auto &Block = Func.Blocks[B]; - assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) && + assert((!Block.HasUnknownWeight || Block.Weight == 0 || Block.isEntry()) && "non-zero weight of a block w/o weight except for an entry"); // Split every block into two nodes @@ -1076,22 +1097,22 @@ // We assume that decreasing block counts is more expensive than increasing, // and thus, setting separate costs here. In the future we may want to tune // the relative costs so as to maximize the quality of generated profiles. - int64_t AuxCostInc = SampleProfileProfiCostInc; - int64_t AuxCostDec = SampleProfileProfiCostDec; - if (Block.UnknownWeight) { + int64_t AuxCostInc = Params.CostBlockInc; + int64_t AuxCostDec = Params.CostBlockDec; + if (Block.HasUnknownWeight) { // Do not penalize changing weights of blocks w/o known profile count - AuxCostInc = 0; + AuxCostInc = Params.CostBlockUnknownInc; AuxCostDec = 0; } else { // Increasing the count for "cold" blocks with zero initial count is more // expensive than for "hot" ones if (Block.Weight == 0) { - AuxCostInc = SampleProfileProfiCostIncZero; + AuxCostInc = Params.CostBlockZeroInc; } // Modifying the count of the entry block is expensive if (Block.isEntry()) { - AuxCostInc = SampleProfileProfiCostIncEntry; - AuxCostDec = SampleProfileProfiCostDecEntry; + AuxCostInc = Params.CostBlockEntryInc; + AuxCostDec = Params.CostBlockEntryDec; } } // For blocks with self-edges, do not penalize a reduction of the count, @@ -1115,7 +1136,7 @@ if (Src != Dst) { uint64_t SrcOut = 3 * Src + 1; uint64_t DstIn = 3 * Dst; - uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0; + uint64_t Cost = Jump.IsUnlikely ? Params.CostUnlikely : 0; Network.addEdge(SrcOut, DstIn, Cost); } } @@ -1232,17 +1253,17 @@ } // end of anonymous namespace /// Apply the profile inference algorithm for a given flow function -void llvm::applyFlowInference(FlowFunction &Func) { +void llvm::applyFlowInference(const ProfiParams &Params, FlowFunction &Func) { // Create and apply an inference network model - auto InferenceNetwork = MinCostMaxFlow(); - initializeNetwork(InferenceNetwork, Func); + auto InferenceNetwork = MinCostMaxFlow(Params); + initializeNetwork(Params, InferenceNetwork, Func); InferenceNetwork.run(); // Extract flow values for every block and every edge extractWeights(InferenceNetwork, Func); // Post-processing adjustments to the flow - auto Adjuster = FlowAdjuster(Func); + auto Adjuster = FlowAdjuster(Params, Func); Adjuster.run(); #ifndef NDEBUG @@ -1250,3 +1271,20 @@ verifyWeights(Func); #endif } + +/// Apply the profile inference algorithm for a given flow function +void llvm::applyFlowInference(FlowFunction &Func) { + ProfiParams Params; + // Set the params from the command-line flags. + Params.EvenFlowDistribution = SampleProfileEvenFlowDistribution; + Params.RebalanceUnknown = SampleProfileRebalanceUnknown; + Params.JoinIslands = SampleProfileJoinIslands; + Params.CostBlockInc = SampleProfileProfiCostBlockInc; + Params.CostBlockDec = SampleProfileProfiCostBlockDec; + Params.CostBlockEntryInc = SampleProfileProfiCostBlockEntryInc; + Params.CostBlockEntryDec = SampleProfileProfiCostBlockEntryDec; + Params.CostBlockZeroInc = SampleProfileProfiCostBlockZeroInc; + Params.CostBlockUnknownInc = SampleProfileProfiCostBlockUnknownInc; + + applyFlowInference(Params, Func); +} diff --git a/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp b/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp --- a/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp +++ b/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp @@ -42,10 +42,6 @@ "sample-profile-use-profi", cl::Hidden, cl::desc("Use profi to infer block and edge counts.")); -cl::opt SampleProfileInferEntryCount( - "sample-profile-infer-entry-count", cl::init(true), cl::Hidden, - cl::desc("Use profi to infer function entry count.")); - namespace sampleprofutil { /// Return true if the given callsite is hot wrt to hot cutoff threshold. diff --git a/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll b/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll @@ -1,19 +1,19 @@ ; Make sure Import GUID list for ThinLTO properly set for CSSPGO -; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -sample-profile-even-count-distribution=0 -S | FileCheck %s +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof --sample-profile-even-flow-distribution=0 -S | FileCheck %s ; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/csspgo-import-list.prof -o %t.prof -; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%t.prof -sample-profile-even-count-distribution=0 -S | FileCheck %s +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%t.prof --sample-profile-even-flow-distribution=0 -S | FileCheck %s ; RUN: llvm-profdata show --sample -show-sec-info-only %t.prof | FileCheck %s --check-prefix=CHECK-ORDERED ; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/csspgo-import-list.prof -o %t.md5 -; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%t.md5 -sample-profile-even-count-distribution=0 -S | FileCheck %s +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%t.md5 --sample-profile-even-flow-distribution=0 -S | FileCheck %s ; RUN: llvm-profdata show --sample -show-sec-info-only %t.md5 | FileCheck %s --check-prefix=CHECK-ORDERED ;; Validate that with replay in effect, we import call sites even if they are below the threshold ;; Baseline import decisions -; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -profile-summary-hot-count=10000 -sample-profile-even-count-distribution=0 -S | FileCheck %s --check-prefix=THRESHOLD +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -profile-summary-hot-count=10000 --sample-profile-even-flow-distribution=0 -S | FileCheck %s --check-prefix=THRESHOLD ;; With replay -; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -sample-profile-inline-replay=%S/Inputs/csspgo-import-list-replay.txt -sample-profile-inline-replay-scope=Module -profile-summary-hot-count=10000 -sample-profile-even-count-distribution=0 -S | FileCheck %s --check-prefix=THRESHOLD-REPLAY +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -sample-profile-inline-replay=%S/Inputs/csspgo-import-list-replay.txt -sample-profile-inline-replay-scope=Module -profile-summary-hot-count=10000 --sample-profile-even-flow-distribution=0 -S | FileCheck %s --check-prefix=THRESHOLD-REPLAY ;; With replay but no profile information for call to _Z5funcAi. We import _Z5funcAi because it's explicitly in the replay but don't go further to its callee (_Z3fibi) because we lack samples -; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list-no-funca.prof -sample-profile-inline-replay=%S/Inputs/csspgo-import-list-replay.txt -sample-profile-inline-replay-scope=Module -profile-summary-hot-count=10000 -sample-profile-even-count-distribution=0 -S | FileCheck %s --check-prefix=THRESHOLD-REPLAY-NO-FUNCA +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list-no-funca.prof -sample-profile-inline-replay=%S/Inputs/csspgo-import-list-replay.txt -sample-profile-inline-replay-scope=Module -profile-summary-hot-count=10000 --sample-profile-even-flow-distribution=0 -S | FileCheck %s --check-prefix=THRESHOLD-REPLAY-NO-FUNCA declare i32 @_Z5funcBi(i32 %x) declare i32 @_Z5funcAi(i32 %x) diff --git a/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll b/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll --- a/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll +++ b/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll @@ -8,10 +8,10 @@ ; main:3 @ _Z5funcAi ; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi ; _Z5funcBi:1 @ _Z8funcLeafi -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -sample-profile-even-count-distribution=0 -S | FileCheck %s --check-prefix=INLINE-ALL -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -sample-profile-even-count-distribution=0 -S | FileCheck %s --check-prefix=INLINE-ALL -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -sample-profile-even-count-distribution=0 -S | FileCheck %s --check-prefix=INLINE-ALL -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -sample-profile-even-count-distribution=0 -S | FileCheck %s --check-prefix=INLINE-ALL +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -sample-profile-even-flow-distribution=0 -S | FileCheck %s --check-prefix=INLINE-ALL +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -sample-profile-even-flow-distribution=0 -S | FileCheck %s --check-prefix=INLINE-ALL +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -sample-profile-even-flow-distribution=0 -S | FileCheck %s --check-prefix=INLINE-ALL +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -sample-profile-even-flow-distribution=0 -S | FileCheck %s --check-prefix=INLINE-ALL ; ; Test we inlined the following in top-down order and entry counts accurate reflects post-inline base profile ; _Z5funcAi:1 @ _Z8funcLeafi