diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp --- a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp +++ b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp @@ -144,7 +144,7 @@ /// A cost of decreasing the entry block's count by one. static constexpr int64_t AuxCostDecEntry = 10; /// A cost of taking an unlikely jump. - static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20; + static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30; private: /// Check for existence of an augmenting path with a positive capacity. @@ -236,7 +236,7 @@ } } - /// An node in a flow network. + /// A node in a flow network. struct Node { /// The cost of the cheapest path from the source to the current node. int64_t Distance; @@ -303,9 +303,6 @@ rebalanceUnknownSubgraphs(); } - /// The probability for the first successor of a unknown subgraph - static constexpr double UnknownFirstSuccProbability = 0.5; - private: void joinIsolatedComponents() { // Find blocks that are reachable from the source @@ -452,45 +449,70 @@ uint64_t NumBlocks() const { return Func.Blocks.size(); } - /// Rebalance unknown subgraphs so as each branch splits with probabilities - /// UnknownFirstSuccProbability and 1 - UnknownFirstSuccProbability + /// Rebalance unknown subgraphs so that the flow is split evenly across the + /// outgoing branches of every block of the subgraph. The method iterates over + /// blocks with known weight and identifies unknown subgraphs rooted at the + /// blocks. Then it verifies if flow rebalancing is feasible and applies it. void rebalanceUnknownSubgraphs() { - static_assert( - UnknownFirstSuccProbability >= 0.0 && - UnknownFirstSuccProbability <= 1.0, - "the share of the unknown successor should be between 0 and 1"); - // Try to find unknown subgraphs from each non-unknown block + // Try to find unknown subgraphs from each block for (uint64_t I = 0; I < Func.Blocks.size(); I++) { auto SrcBlock = &Func.Blocks[I]; - // Do not attempt to find unknown successors from a unknown or a - // zero-flow block - if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0) + // Verify if rebalancing rooted at SrcBlock is feasible + if (!canRebalanceAtRoot(SrcBlock)) continue; - std::vector UnknownSuccs; + // Find an unknown subgraphs starting at SrcBlock. Along the way, + // fill in known destinations and intermediate unknown blocks. + std::vector UnknownBlocks; + std::vector KnownDstBlocks; + findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks); + + // Verify if rebalancing of the subgraph is feasible. If the search is + // successful, find the unique destination block (which can be null) FlowBlock *DstBlock = nullptr; - // Find a unknown subgraphs starting at block SrcBlock - if (!findUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs)) + if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks, + DstBlock)) continue; - // At the moment, we do not rebalance subgraphs containing cycles among - // unknown blocks - if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownSuccs)) + + // We cannot rebalance subgraphs containing cycles among unknown blocks + if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks)) continue; // Rebalance the flow - rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs); + rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks); } } - /// Find a unknown subgraph starting at block SrcBlock. - /// If the search is successful, the method sets DstBlock and UnknownSuccs. - bool findUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *&DstBlock, - std::vector &UnknownSuccs) { + /// Verify if rebalancing rooted at a given block is possible. + bool canRebalanceAtRoot(const FlowBlock *SrcBlock) { + // Do not attempt to find unknown subgraphs from an unknown or a + // zero-flow block + if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0) + return false; + + // Do not attempt to process subgraphs from a block w/o unknown sucessors + bool HasUnknownSuccs = false; + for (auto Jump : SrcBlock->SuccJumps) { + if (Func.Blocks[Jump->Target].UnknownWeight) { + HasUnknownSuccs = true; + break; + } + } + if (!HasUnknownSuccs) + return false; + + return true; + } + + /// Find an unknown subgraph starting at block SrcBlock. The method sets + /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks. + void findUnknownSubgraph(const FlowBlock *SrcBlock, + std::vector &KnownDstBlocks, + std::vector &UnknownBlocks) { // Run BFS from SrcBlock and make sure all paths are going through unknown - // blocks and end at a non-unknown DstBlock + // blocks and end at a known DstBlock auto Visited = std::vector(NumBlocks(), false); std::queue Queue; - DstBlock = nullptr; Queue.push(SrcBlock->Index); Visited[SrcBlock->Index] = true; @@ -499,52 +521,105 @@ Queue.pop(); // Process blocks reachable from Block for (auto Jump : Block.SuccJumps) { + // If Jump can be ignored, skip it + if (ignoreJump(SrcBlock, nullptr, Jump)) + continue; + uint64_t Dst = Jump->Target; + // If Dst has been visited, skip Jump if (Visited[Dst]) continue; + // Process block Dst Visited[Dst] = true; if (!Func.Blocks[Dst].UnknownWeight) { - // If we see non-unique non-unknown block reachable from SrcBlock, - // stop processing and skip rebalancing - FlowBlock *CandidateDstBlock = &Func.Blocks[Dst]; - if (DstBlock != nullptr && DstBlock != CandidateDstBlock) - return false; - DstBlock = CandidateDstBlock; + KnownDstBlocks.push_back(&Func.Blocks[Dst]); } else { Queue.push(Dst); - UnknownSuccs.push_back(&Func.Blocks[Dst]); + UnknownBlocks.push_back(&Func.Blocks[Dst]); } } } + } + /// Verify if rebalancing of the subgraph is feasible. If the checks are + /// successful, set the unique destination block, DstBlock (can be null). + bool canRebalanceSubgraph(const FlowBlock *SrcBlock, + const std::vector &KnownDstBlocks, + const std::vector &UnknownBlocks, + FlowBlock *&DstBlock) { // If the list of unknown blocks is empty, we don't need rebalancing - if (UnknownSuccs.empty()) + if (UnknownBlocks.empty()) return false; - // If all reachable nodes from SrcBlock are unknown, skip rebalancing - if (DstBlock == nullptr) + + // If there are multiple known sinks, we can't rebalance + if (KnownDstBlocks.size() > 1) return false; - // If any of the unknown blocks is an exit block, skip rebalancing - for (auto Block : UnknownSuccs) { - if (Block->isExit()) + DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front(); + + // Verify sinks of the subgraph + for (auto Block : UnknownBlocks) { + if (Block->SuccJumps.empty()) { + // If there are multiple (known and unknown) sinks, we can't rebalance + if (DstBlock != nullptr) + return false; + continue; + } + size_t NumIgnoredJumps = 0; + for (auto Jump : Block->SuccJumps) { + if (ignoreJump(SrcBlock, DstBlock, Jump)) + NumIgnoredJumps++; + } + // If there is a non-sink block in UnknownBlocks with all jumps ignored, + // then we can't rebalance + if (NumIgnoredJumps == Block->SuccJumps.size()) return false; } return true; } + /// Decide whether the Jump is ignored while processing an unknown subgraphs + /// rooted at basic block SrcBlock with the destination block, DstBlock. + bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, + const FlowJump *Jump) { + // Ignore unlikely jumps with zero flow + if (Jump->IsUnlikely && Jump->Flow == 0) + return true; + + auto JumpSource = &Func.Blocks[Jump->Source]; + auto JumpTarget = &Func.Blocks[Jump->Target]; + + // Do not ignore jumps coming into DstBlock + if (DstBlock != nullptr && JumpTarget == DstBlock) + return false; + + // Ignore jumps out of SrcBlock to known blocks + if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock) + return true; + + // Ignore jumps to known blocks with zero flow + if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0) + return true; + + return false; + } + /// Verify if the given unknown subgraph is acyclic, and if yes, reorder - /// UnknownSuccs in the topological order (so that all jumps are "forward"). - bool isAcyclicSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock, - std::vector &UnknownSuccs) { + /// UnknownBlocks in the topological order (so that all jumps are "forward"). + bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, + std::vector &UnknownBlocks) { // Extract local in-degrees in the considered subgraph auto LocalInDegree = std::vector(NumBlocks(), 0); - for (auto Jump : SrcBlock->SuccJumps) { - LocalInDegree[Jump->Target]++; - } - for (uint64_t I = 0; I < UnknownSuccs.size(); I++) { - for (auto Jump : UnknownSuccs[I]->SuccJumps) { + auto fillInDegree = [&](const FlowBlock *Block) { + for (auto Jump : Block->SuccJumps) { + if (ignoreJump(SrcBlock, DstBlock, Jump)) + continue; LocalInDegree[Jump->Target]++; } + }; + fillInDegree(SrcBlock); + for (auto Block : UnknownBlocks) { + fillInDegree(Block); } // A loop containing SrcBlock if (LocalInDegree[SrcBlock->Index] > 0) @@ -554,15 +629,20 @@ std::queue Queue; Queue.push(SrcBlock->Index); while (!Queue.empty()) { - auto &Block = Func.Blocks[Queue.front()]; + FlowBlock *Block = &Func.Blocks[Queue.front()]; Queue.pop(); - // Stop propagation once we reach DstBlock - if (Block.Index == DstBlock->Index) + // Stop propagation once we reach DstBlock, if any + if (DstBlock != nullptr && Block == DstBlock) break; - AcyclicOrder.push_back(&Block); + // Keep an acyclic order of unknown blocks + if (Block->UnknownWeight && Block != SrcBlock) + AcyclicOrder.push_back(Block); + // Add to the queue all successors with zero local in-degree - for (auto Jump : Block.SuccJumps) { + for (auto Jump : Block->SuccJumps) { + if (ignoreJump(SrcBlock, DstBlock, Jump)) + continue; uint64_t Dst = Jump->Target; LocalInDegree[Dst]--; if (LocalInDegree[Dst] == 0) { @@ -573,42 +653,69 @@ // If there is a cycle in the subgraph, AcyclicOrder contains only a subset // of all blocks - if (UnknownSuccs.size() + 1 != AcyclicOrder.size()) + if (UnknownBlocks.size() != AcyclicOrder.size()) return false; - UnknownSuccs = AcyclicOrder; + UnknownBlocks = AcyclicOrder; return true; } - /// Rebalance a given subgraph. - void rebalanceUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock, - std::vector &UnknownSuccs) { + /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and + /// having UnknownBlocks intermediate blocks. + void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock, + const FlowBlock *DstBlock, + const std::vector &UnknownBlocks) { assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph"); - assert(UnknownSuccs.front() == SrcBlock && "incorrect order of unknowns"); - for (auto Block : UnknownSuccs) { + // Ditribute flow from the source block + uint64_t BlockFlow = 0; + // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps + for (auto Jump : SrcBlock->SuccJumps) { + if (ignoreJump(SrcBlock, DstBlock, Jump)) + continue; + BlockFlow += Jump->Flow; + } + rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow); + + // Ditribute flow from the remaining blocks + for (auto Block : UnknownBlocks) { + assert(Block->UnknownWeight && "incorrect unknown subgraph"); + uint64_t BlockFlow = 0; // Block's flow is the sum of incoming flows - uint64_t TotalFlow = 0; - if (Block == SrcBlock) { - TotalFlow = Block->Flow; - } else { - for (auto Jump : Block->PredJumps) { - TotalFlow += Jump->Flow; - } - Block->Flow = TotalFlow; + for (auto Jump : Block->PredJumps) { + BlockFlow += Jump->Flow; } + Block->Flow = BlockFlow; + rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow); + } + } - // Process all successor jumps and update corresponding flow values - for (uint64_t I = 0; I < Block->SuccJumps.size(); I++) { - auto Jump = Block->SuccJumps[I]; - if (I + 1 == Block->SuccJumps.size()) { - Jump->Flow = TotalFlow; - continue; - } - uint64_t Flow = uint64_t(TotalFlow * UnknownFirstSuccProbability); - Jump->Flow = Flow; - TotalFlow -= Flow; - } + /// Redistribute flow for a block in a subgraph rooted at SrcBlock, + /// and ending at DstBlock. + void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, + const FlowBlock *Block, uint64_t BlockFlow) { + // Process all successor jumps and update corresponding flow values + size_t BlockDegree = 0; + for (auto Jump : Block->SuccJumps) { + if (ignoreJump(SrcBlock, DstBlock, Jump)) + continue; + BlockDegree++; + } + // If all successor jumps of the block are ignored, skip it + if (DstBlock == nullptr && BlockDegree == 0) + return; + assert(BlockDegree > 0 && "all outgoing jumps are ignored"); + + // Each of the Block's successors gets the following amount of flow. + // Rounding the value up so that all flow is propagated + uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree; + for (auto Jump : Block->SuccJumps) { + if (ignoreJump(SrcBlock, DstBlock, Jump)) + continue; + uint64_t Flow = std::min(SuccFlow, BlockFlow); + Jump->Flow = Flow; + BlockFlow -= Flow; } + assert(BlockFlow == 0 && "not all flow is propagated"); } /// A constant indicating an arbitrary exit block of a function. diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-inference-rebalance-large.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-inference-rebalance-large.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-inference-rebalance-large.prof @@ -0,0 +1,26 @@ +foo1:37078302:0 + 1: 3300 + 2: 0 + 6: 3300 + !CFGChecksum: 157181141624 + +foo2:37078302:0 + 1: 128 + 2: 128 + 3: 128 + 4: 128 + !CFGChecksum: 208782362068 + +foo3:37078302:0 + 1: 500 + 2: 1500 + 4: 1200 + 6: 900 + 9: 500 + !CFGChecksum: 189901498683 + +foo4:37078302:0 + 1: 400 + 3: 400 + 10: 400 + !CFGChecksum: 241030178952 diff --git a/llvm/test/Transforms/SampleProfile/profile-inference-rebalance-large.ll b/llvm/test/Transforms/SampleProfile/profile-inference-rebalance-large.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/profile-inference-rebalance-large.ll @@ -0,0 +1,387 @@ +; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference-rebalance-large.prof | opt -analyze -branch-prob -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference-rebalance-large.prof | opt -analyze -block-freq -enable-new-pm=0 | FileCheck %s --check-prefix=CHECK2 + +; The test verifies that counts can rebalanced in switch statements that contain +; both 'known' and 'unknown' basic blocks. +; +; +---------+ +; +----------------- | b15 [?] | +; | +---------+ +; | ^ +; | | +; | | +; | +---------+ +--------------+ +---------+ +; | | b13 [?] | <-- | b11 [3300] | --> | b14 [?] | +; | +---------+ +--------------+ +---------+ +; | | | | | +; | | | | | +; | | v | | +; | | +---------+ | | +; | | | b12 [0] | | | +; | | +---------+ | | +; | | | | | +; | | | | | +; | | v v | +; | | +--------------+ | +; | +-----------> | | <-----+ +; | | b16 [3300] | +; +----------------> | | +; +--------------+ + +@yydebug = dso_local global i32 0, align 4 + +; Function Attrs: nounwind uwtable +define dso_local i32 @foo1(i32 %0, i32 %1) #0 { +b11: + call void @llvm.pseudoprobe(i64 7682762345278052905, i64 1, i32 0, i64 -1) + %cmp = icmp ne i32 %0, 0 + switch i32 %1, label %b12 [ + i32 1, label %b13 + i32 2, label %b14 + i32 3, label %b15 + i32 4, label %b16 + ] +; CHECK: edge b11 -> b12 probability is 0x00000000 / 0x80000000 = 0.00% +; CHECK: edge b11 -> b13 probability is 0x20000000 / 0x80000000 = 25.00% +; CHECK: edge b11 -> b14 probability is 0x20000000 / 0x80000000 = 25.00% +; CHECK: edge b11 -> b15 probability is 0x20000000 / 0x80000000 = 25.00% +; CHECK: edge b11 -> b16 probability is 0x20000000 / 0x80000000 = 25.00% +; CHECK2: - b11: float = {{.*}}, int = {{.*}}, count = 3300 + +b12: + call void @llvm.pseudoprobe(i64 7682762345278052905, i64 2, i32 0, i64 -1) + br label %b16 +; CHECK2: - b12: float = {{.*}}, int = {{.*}}, count = 0 + +b13: + call void @llvm.pseudoprobe(i64 7682762345278052905, i64 3, i32 0, i64 -1) + br label %b16 +; CHECK2: - b13: float = {{.*}}, int = {{.*}}, count = 825 + +b14: + call void @llvm.pseudoprobe(i64 7682762345278052905, i64 4, i32 0, i64 -1) + br label %b16 +; CHECK2: - b14: float = {{.*}}, int = {{.*}}, count = 825 + +b15: + call void @llvm.pseudoprobe(i64 7682762345278052905, i64 5, i32 0, i64 -1) + br label %b16 +; CHECK2: - b15: float = {{.*}}, int = {{.*}}, count = 825 + +b16: + call void @llvm.pseudoprobe(i64 7682762345278052905, i64 6, i32 0, i64 -1) + ret i32 %1 +; CHECK2: - b16: float = {{.*}}, int = {{.*}}, count = 3300 +} + + +; The test verifies that counts can rebalanced even when control-flow ends at +; a basic block with an unknown count. +; +; +-----------+ +; | b21 [128] | -+ +; +-----------+ | +; | | +; v | +; +-----------+ | +; | b22 [128] | | +; +-----------+ | +; | | +; v | +; +-----------+ | +; +------------ | b23 [128] | <+ +; | +-----------+ +; | | +; v v +; +---------+ +-----------+ +; | b26 [?] | <-- | b24 [128] | +; +---------+ +-----------+ +; | | +; | v +; | +-----------+ +; | | b25 [?] | +; | +-----------+ +; | | +; | v +; | +-----------+ +; +-----------> | b27 [?] | -+ +; +-----------+ | +; | | +; v | +; +-----------+ | +; | b28 [?] | | +; +-----------+ | +; | | +; v | +; +-----------+ | +; | b29 [?] | <+ +; +-----------+ + +define dso_local i32 @foo2(i32 %0, i32 %1) #0 { +b21: + call void @llvm.pseudoprobe(i64 2494702099028631698, i64 1, i32 0, i64 -1) + %cmp = icmp ne i32 %0, 0 + br i1 %cmp, label %b22, label %b23 +; CHECK: edge b21 -> b22 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge b21 -> b23 probability is 0x00000000 / 0x80000000 = 0.00% +; CHECK2: - b21: float = {{.*}}, int = {{.*}}, count = 128 + +b22: + call void @llvm.pseudoprobe(i64 2494702099028631698, i64 2, i32 0, i64 -1) + br label %b23 + +b23: + call void @llvm.pseudoprobe(i64 2494702099028631698, i64 3, i32 0, i64 -1) + br i1 %cmp, label %b24, label %b26 +; CHECK: edge b23 -> b24 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge b23 -> b26 probability is 0x00000000 / 0x80000000 = 0.00% +; CHECK2: - b23: float = {{.*}}, int = {{.*}}, count = 128 + +b24: + call void @llvm.pseudoprobe(i64 2494702099028631698, i64 4, i32 0, i64 -1) + br i1 %cmp, label %b25, label %b26 +; CHECK: edge b24 -> b25 probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge b24 -> b26 probability is 0x40000000 / 0x80000000 = 50.00% + +b25: + call void @llvm.pseudoprobe(i64 2494702099028631698, i64 5, i32 0, i64 -1) + br label %b27 +; CHECK2: - b25: float = {{.*}}, int = {{.*}}, count = 64 + +b26: + call void @llvm.pseudoprobe(i64 2494702099028631698, i64 6, i32 0, i64 -1) + br label %b27 +; CHECK2: - b26: float = {{.*}}, int = {{.*}}, count = 64 + +b27: + call void @llvm.pseudoprobe(i64 2494702099028631698, i64 7, i32 0, i64 -1) + br i1 %cmp, label %b28, label %b29 +; CHECK: edge b27 -> b28 probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge b27 -> b29 probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK2: - b27: float = {{.*}}, int = {{.*}}, count = 128 + +b28: + call void @llvm.pseudoprobe(i64 2494702099028631698, i64 8, i32 0, i64 -1) + br label %b29 +; CHECK2: - b28: float = {{.*}}, int = {{.*}}, count = 64 + +b29: + call void @llvm.pseudoprobe(i64 2494702099028631698, i64 9, i32 0, i64 -1) + ret i32 %1 +; CHECK2: - b29: float = {{.*}}, int = {{.*}}, count = 128 +} + + +; The test verifies a flexible mode of rebalancing in which some jumps to known +; basic blocks are ignored. +; +; +------------+ +; | b31 [500] | +; +------------+ +; | +; v +; +---------+ +------------+ +; | b33 [?] | <-- | b32 [1500] | <-----+ +; +---------+ +------------+ | +; | | | +; | v | +; | +------------+ +-----------+ +; | | b34 [1200] | --> | b36 [900] | +; | +------------+ +-----------+ +; | | +; | v +; | +------------+ +; | | b35 [?] | +; | +------------+ +; | | +; | v +; | +------------+ +; +-----------> | b37 [?] | -+ +; +------------+ | +; | | +; v | +; +------------+ | +; | b38 [?] | | +; +------------+ | +; | | +; v | +; +------------+ | +; | b39 [500] | <+ +; +------------+ +; + +define dso_local i32 @foo3(i32 %0, i32 %1) #0 { +b31: + call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 1, i32 0, i64 -1) + %cmp = icmp ne i32 %0, 0 + br label %b32 +; CHECK2: - b31: float = {{.*}}, int = {{.*}}, count = 500 + +b32: + call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 2, i32 0, i64 -1) + br i1 %cmp, label %b33, label %b34 +; CHECK: edge b32 -> b33 probability is 0x1999999a / 0x80000000 = 20.00% +; CHECK: edge b32 -> b34 probability is 0x66666666 / 0x80000000 = 80.00% +; CHECK2: - b32: float = {{.*}}, int = {{.*}}, count = 1500 + +b33: + call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 3, i32 0, i64 -1) + br label %b37 +; CHECK2: - b33: float = {{.*}}, int = {{.*}}, count = 300 + +b34: + call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 4, i32 0, i64 -1) + br i1 %cmp, label %b35, label %b36 +; CHECK: edge b34 -> b35 probability is 0x15555555 / 0x80000000 = 16.67% +; CHECK: edge b34 -> b36 probability is 0x6aaaaaab / 0x80000000 = 83.33% [HOT edge] +; CHECK2: - b34: float = {{.*}}, int = {{.*}}, count = 1200 + +b35: + call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 5, i32 0, i64 -1) + br label %b37 +; CHECK2: - b35: float = {{.*}}, int = {{.*}}, count = 200 + +b36: + call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 6, i32 0, i64 -1) + br label %b32 +; CHECK2: - b36: float = {{.*}}, int = {{.*}}, count = 1000 + +b37: + call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 7, i32 0, i64 -1) + br i1 %cmp, label %b38, label %b39 +; CHECK: edge b37 -> b38 probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge b37 -> b39 probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK2: - b37: float = {{.*}}, int = {{.*}}, count = 500 + +b38: + call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 8, i32 0, i64 -1) + br label %b39 +; CHECK2: - b38: float = {{.*}}, int = {{.*}}, count = 250 + +b39: + call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 9, i32 0, i64 -1) + ret i32 %1 +; CHECK2: - b39: float = {{.*}}, int = {{.*}}, count = 500 +} + + +; The test verifies that flow rebalancer can ignore 'unlikely' jumps. +; +; +-----------+ +; | b41 [400] | -+ +; +-----------+ | +; | | +; | | +; v | +; +-----------+ | +; | b42 [?] | | +; +-----------+ | +; | | +; | | +; v v +; +---------++---------+ +---------------------------+ +---------++---------+ +; | b48 [?] || b46 [?] | <-- | | --> | b47 [?] || b49 [?] | +; +---------++---------+ | | +---------++---------+ +; | ^ | | | | ^ +; | | | | b43 [400] | | | +; | +-------+-------------| | | | +; | | | | | | +; | | | | ------+----------+ +; | | +---------------------------+ | +; | | | | | +; | | | | | +; | | v v | +; | | +-----------+ +---------+ | +; | | | b44 [?] | | b45 [?] | | +; | | +-----------+ +---------+ | +; | | | | | +; | | | | | +; | | v v | +; | | +---------------------------+ | +; | +-----------> | | <-----+ +; | | b410 [400] | +; | | | +; +----------------------> | | +; +---------------------------+ + + +define dso_local void @foo4(i32 %0, i32 %1) #0 { +b41: + call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 1, i32 0, i64 -1) + %cmp = icmp ne i32 %0, 0 + br i1 %cmp, label %b42, label %b43 +; CHECK: edge b41 -> b42 probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge b41 -> b43 probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK2: - b41: float = {{.*}}, int = {{.*}}, count = 400 + +b42: + call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 2, i32 0, i64 -1) + br label %b43 +; CHECK2: - b42: float = {{.*}}, int = {{.*}}, count = 200 + +b43: + call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 3, i32 0, i64 -1) + switch i32 %1, label %b49 [ + i32 1, label %b44 + i32 2, label %b45 + i32 3, label %b46 + i32 4, label %b47 + i32 5, label %b48 + ] +; CHECK: edge b43 -> b49 probability is 0x00000000 / 0x80000000 = 0.00% +; CHECK: edge b43 -> b44 probability is 0x1999999a / 0x80000000 = 20.00% +; CHECK: edge b43 -> b45 probability is 0x1999999a / 0x80000000 = 20.00% +; CHECK: edge b43 -> b46 probability is 0x1999999a / 0x80000000 = 20.00% +; CHECK: edge b43 -> b47 probability is 0x1999999a / 0x80000000 = 20.00% +; CHECK: edge b43 -> b48 probability is 0x1999999a / 0x80000000 = 20.00% +; CHECK2: - b43: float = {{.*}}, int = {{.*}}, count = 400 + +b44: + call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 4, i32 0, i64 -1) + br label %b410 +; CHECK2: - b44: float = {{.*}}, int = {{.*}}, count = 80 + +b45: + call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 5, i32 0, i64 -1) + br label %b410 +; CHECK2: - b45: float = {{.*}}, int = {{.*}}, count = 80 + +b46: + call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 6, i32 0, i64 -1) + br label %b410 +; CHECK2: - b46: float = {{.*}}, int = {{.*}}, count = 80 + +b47: + call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 7, i32 0, i64 -1) + br label %b410 +; CHECK2: - b47: float = {{.*}}, int = {{.*}}, count = 80 + +b48: + call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 8, i32 0, i64 -1) + br label %b410 +; CHECK2: - b48: float = {{.*}}, int = {{.*}}, count = 80 + +b49: + call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 9, i32 0, i64 -1) + unreachable +; CHECK2: - b49: float = {{.*}}, int = {{.*}}, count = 0 + +b410: + call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 10, i32 0, i64 -1) + ret void +; CHECK2: - b410: float = {{.*}}, int = {{.*}}, count = 400 +} + + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #4 + +attributes #0 = { noinline nounwind uwtable "use-sample-profile" } +attributes #4 = { inaccessiblememonly nounwind willreturn } + +!llvm.pseudo_probe_desc = !{!7, !8, !9, !10} + +!7 = !{i64 7682762345278052905, i64 157181141624, !"foo1", null} +!8 = !{i64 2494702099028631698, i64 208782362068, !"foo2", null} +!9 = !{i64 -7908226060800700466, i64 189901498683, !"foo3", null} +!10 = !{i64 -6882312132165544686, i64 241030178952, !"foo4", null} diff --git a/llvm/test/Transforms/SampleProfile/profile-inference-rebalance.ll b/llvm/test/Transforms/SampleProfile/profile-inference-rebalance.ll --- a/llvm/test/Transforms/SampleProfile/profile-inference-rebalance.ll +++ b/llvm/test/Transforms/SampleProfile/profile-inference-rebalance.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference-rebalance.prof | opt -analyze -branch-prob -enable-new-pm=0 | FileCheck %s ; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference-rebalance.prof | opt -analyze -block-freq -enable-new-pm=0 | FileCheck %s --check-prefix=CHECK2 -; The test contains a "dimanond" and a "triangle" that needs to be rebalanced +; The test contains a "diamond" and a "triangle" that needs to be rebalanced ; after basic profile inference. ; ; +----------------+