diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h --- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -27,6 +27,7 @@ #include #include #include +#include #include namespace llvm { @@ -35,6 +36,7 @@ class Loop; class LoopInfo; class raw_ostream; +class DominatorTree; class PostDominatorTree; class TargetLibraryInfo; class Value; @@ -51,20 +53,79 @@ /// identify an edge, since we can have multiple edges from Src to Dst. /// As an example, we can have a switch which jumps to Dst with value 0 and /// value 10. +/// +/// Process of computing branch probabilities can be logically viewed as three +/// step process: +/// +/// First, if there is a profile information associated with the branch then +/// it is trivially translated to branch probabilities. There is one exception +/// from this rule though. Probabilities for edges leading to "unreachable" +/// blocks (blocks with the estimated weight not greater than +/// UNREACHABLE_WEIGHT) are evaluated according to static estimation and +/// override profile information. If no branch probabilities were calculated +/// on this step then take the next one. +/// +/// Second, estimate absolute execution weights for each block based on +/// statically known information. Roots of such information are "cold", +/// "unreachable", "noreturn" and "unwind" blocks. Those blocks get their +/// weights set to BlockExecWeight::COLD, BlockExecWeight::UNREACHABLE, +/// BlockExecWeight::NORETURN and BlockExecWeight::UNWIND respectively. Then the +/// weights are propagated to the other blocks up the domination line. In +/// addition, if all successors have estimated weights set then maximum of these +/// weights assigned to the block itself (while this is not ideal heuristic in +/// theory it's simple and works reasonably well in most cases) and the process +/// repeats. Once the process of weights propagation converges branch +/// probabilities are set for all such branches that have at least one successor +/// with the weight set. Default execution weight (BlockExecWeight::DEFAULT) is +/// used for any successors which doesn't have its weight set. For loop back +/// branches we use their weights scaled by loop trip count equal to +/// 'LBH_TAKEN_WEIGHT/LBH_NOTTAKEN_WEIGHT'. +/// +/// Here is a simple example demonstrating how the described algorithm works. +/// +/// BB1 +/// / \ +/// v v +/// BB2 BB3 +/// / \ +/// v v +/// ColdBB UnreachBB +/// +/// Initially, ColdBB is associated with COLD_WEIGHT and UnreachBB with +/// UNREACHABLE_WEIGHT. COLD_WEIGHT is set to BB2 as maximum between its +/// successors. BB1 and BB3 has no explicit estimated weights and assumed to +/// have DEFAULT_WEIGHT. Based on assigned weights branches will have the +/// following probabilities: +/// P(BB1->BB2) = COLD_WEIGHT/(COLD_WEIGHT + DEFAULT_WEIGHT) = +/// 0xffff / (0xffff + 0xfffff) = 0.0588(5.9%) +/// P(BB1->BB3) = DEFAULT_WEIGHT_WEIGHT/(COLD_WEIGHT + DEFAULT_WEIGHT) = +/// 0xfffff / (0xffff + 0xfffff) = 0.941(94.1%) +/// P(BB2->ColdBB) = COLD_WEIGHT/(COLD_WEIGHT + UNREACHABLE_WEIGHT) = 1(100%) +/// P(BB2->UnreachBB) = +/// UNREACHABLE_WEIGHT/(COLD_WEIGHT+UNREACHABLE_WEIGHT) = 0(0%) +/// +/// If no branch probabilities were calculated on this step then take the next +/// one. +/// +/// Third, apply different kinds of local heuristics for each individual +/// branch until first match. For example probability of a pointer to be null is +/// estimated as PH_TAKEN_WEIGHT/(PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT). If +/// no local heuristic has been matched then branch is left with no explicit +/// probability set and assumed to have default probability. class BranchProbabilityInfo { public: BranchProbabilityInfo() = default; BranchProbabilityInfo(const Function &F, const LoopInfo &LI, const TargetLibraryInfo *TLI = nullptr, + DominatorTree *DT = nullptr, PostDominatorTree *PDT = nullptr) { - calculate(F, LI, TLI, PDT); + calculate(F, LI, TLI, DT, PDT); } BranchProbabilityInfo(BranchProbabilityInfo &&Arg) : Probs(std::move(Arg.Probs)), LastF(Arg.LastF), - PostDominatedByUnreachable(std::move(Arg.PostDominatedByUnreachable)), - PostDominatedByColdCall(std::move(Arg.PostDominatedByColdCall)) {} + EstimatedBlockWeight(std::move(Arg.EstimatedBlockWeight)) {} BranchProbabilityInfo(const BranchProbabilityInfo &) = delete; BranchProbabilityInfo &operator=(const BranchProbabilityInfo &) = delete; @@ -72,8 +133,7 @@ BranchProbabilityInfo &operator=(BranchProbabilityInfo &&RHS) { releaseMemory(); Probs = std::move(RHS.Probs); - PostDominatedByColdCall = std::move(RHS.PostDominatedByColdCall); - PostDominatedByUnreachable = std::move(RHS.PostDominatedByUnreachable); + EstimatedBlockWeight = std::move(RHS.EstimatedBlockWeight); return *this; } @@ -143,11 +203,13 @@ } void calculate(const Function &F, const LoopInfo &LI, - const TargetLibraryInfo *TLI, PostDominatorTree *PDT); + const TargetLibraryInfo *TLI, DominatorTree *DT, + PostDominatorTree *PDT); /// Forget analysis results for the given basic block. void eraseBlock(const BasicBlock *BB); + // Data structure to track SCCs for handling irreducible loops. class SccInfo { // Enum of types to classify basic blocks in SCC. Basic block belonging to // SCC is 'Inner' until it is either 'Header' or 'Exiting'. Note that a @@ -236,6 +298,8 @@ const SccInfo &SccI); const BasicBlock *getBlock() const { return BB; } + BasicBlock *getBlock() { return const_cast(BB); } + LoopData getLoopData() const { return LD; } Loop *getLoop() const { return LD.first; } int getSccNum() const { return LD.second; } @@ -249,6 +313,7 @@ const BasicBlock *const BB = nullptr; LoopData LD = {nullptr, -1}; }; + // Pair of LoopBlocks representing an edge from first to second block. using LoopEdge = std::pair; @@ -258,27 +323,26 @@ // a pair (PredBlock and an index in the successors) to specify an edge. using Edge = std::pair; - // Default weight value. Used when we don't have information about the edge. - // TODO: DEFAULT_WEIGHT makes sense during static predication, when none of - // the successors have a weight yet. But it doesn't make sense when providing - // weight to an edge that may have siblings with non-zero weights. This can - // be handled various ways, but it's probably fine for an edge with unknown - // weight to just "inherit" the non-zero weight of an adjacent successor. - static const uint32_t DEFAULT_WEIGHT = 16; - DenseMap Probs; /// Track the last function we run over for printing. const Function *LastF = nullptr; + const LoopInfo *LI = nullptr; + /// Keeps information about all SCCs in a function. std::unique_ptr SccI; - /// Track the set of blocks directly succeeded by a returning block. - SmallPtrSet PostDominatedByUnreachable; + /// Keeps mapping of a basic block to its estimated weight. + SmallDenseMap EstimatedBlockWeight; + + /// Keeps mapping of a loop to estimated weight to enter the loop. + SmallDenseMap EstimatedLoopWeight; - /// Track the set of blocks that always lead to a cold call. - SmallPtrSet PostDominatedByColdCall; + /// Helper to construct LoopBlock for \p BB. + LoopBlock getLoopBlock(const BasicBlock *BB) const { + return LoopBlock(BB, *LI, *SccI.get()); + } /// Returns true if destination block belongs to some loop and source block is /// either doesn't belong to any loop or belongs to a loop which is not inner @@ -301,18 +365,55 @@ void getLoopExitBlocks(const LoopBlock &LB, SmallVectorImpl &Exits) const; - void computePostDominatedByUnreachable(const Function &F, - PostDominatorTree *PDT); - void computePostDominatedByColdCall(const Function &F, - PostDominatorTree *PDT); - bool calcUnreachableHeuristics(const BasicBlock *BB); + /// Returns estimated weight for \p BB. None if \p BB has no estimated weight. + Optional getEstimatedBlockWeight(const BasicBlock *BB) const; + + /// Returns estimated weight to enter \p L. In other words it is weight of + /// loop's header block not scaled by trip count. Returns None if \p L has no + /// no estimated weight. + Optional getEstimatedLoopWeight(const LoopData &L) const; + + /// Return estimated weight for \p Edge. Returns None if estimated weight is + /// unknown. + Optional getEstimatedEdgeWeight(const LoopEdge &Edge) const; + + /// Iterates over all edges leading from \p SrcBB to \p Successors and + /// returns maximum of all estimated weights. If at least one edge has unknown + /// estimated weight None is returned. + template + Optional + getMaxEstimatedEdgeWeight(const LoopBlock &SrcBB, + iterator_range Successors) const; + + /// If \p LoopBB has no estimated weight then set it to \p BBWeight and + /// return true. Otherwise \p BB's weight remains unchanged and false is + /// returned. In addition all blocks/loops that might need their weight to be + /// re-estimated are put into BlockWorkList/LoopWorkList. + bool updateEstimatedBlockWeight(LoopBlock &LoopBB, uint32_t BBWeight, + SmallVectorImpl &BlockWorkList, + SmallVectorImpl &LoopWorkList); + + /// Starting from \p LoopBB (including \p LoopBB itself) propagate \p BBWeight + /// up the domination tree. + void propagateEstimatedBlockWeight(const LoopBlock &LoopBB, DominatorTree *DT, + PostDominatorTree *PDT, uint32_t BBWeight, + SmallVectorImpl &WorkList, + SmallVectorImpl &LoopWorkList); + + /// Returns block's weight encoded in the IR. + Optional getInitialEstimatedBlockWeight(const BasicBlock *BB); + + // Computes estimated weights for all blocks in \p F. + void computeEestimateBlockWeight(const Function &F, DominatorTree *DT, + PostDominatorTree *PDT); + + /// Based on computed weights by \p computeEstimatedBlockWeight set + /// probabilities on branches. + bool calcEstimatedHeuristics(const BasicBlock *BB); bool calcMetadataWeights(const BasicBlock *BB); - bool calcColdCallHeuristics(const BasicBlock *BB); bool calcPointerHeuristics(const BasicBlock *BB); - bool calcLoopBranchHeuristics(const BasicBlock *BB, const LoopInfo &LI); bool calcZeroHeuristics(const BasicBlock *BB, const TargetLibraryInfo *TLI); bool calcFloatingPointHeuristics(const BasicBlock *BB); - bool calcInvokeHeuristics(const BasicBlock *BB); }; /// Analysis pass which computes \c BranchProbabilityInfo. diff --git a/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h b/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h --- a/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h @@ -63,7 +63,7 @@ BranchProbabilityInfo &getCalculated() { if (!Calculated) { assert(F && LI && "call setAnalysis"); - BPI.calculate(*F, *LI, TLI, nullptr); + BPI.calculate(*F, *LI, TLI, nullptr, nullptr); Calculated = true; } return BPI; diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp --- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -61,6 +61,7 @@ "Branch Probability Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) @@ -95,8 +96,6 @@ // Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125 static const uint32_t LBH_TAKEN_WEIGHT = 124; static const uint32_t LBH_NONTAKEN_WEIGHT = 4; -// Unlikely edges within a loop are half as likely as other edges -static const uint32_t LBH_UNLIKELY_WEIGHT = 62; /// Unreachable-terminating branch taken probability. /// @@ -105,20 +104,6 @@ /// All reachable probability will proportionally share the remaining part. static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(1); -/// Weight for a branch taken going into a cold block. -/// -/// This is the weight for a branch taken toward a block marked -/// cold. A block is marked cold if it's postdominated by a -/// block containing a call to a cold function. Cold functions -/// are those marked with attribute 'cold'. -static const uint32_t CC_TAKEN_WEIGHT = 4; - -/// Weight for a branch not-taken into a cold block. -/// -/// This is the weight for a branch not taken toward a block marked -/// cold. -static const uint32_t CC_NONTAKEN_WEIGHT = 64; - static const uint32_t PH_TAKEN_WEIGHT = 20; static const uint32_t PH_NONTAKEN_WEIGHT = 12; @@ -135,18 +120,26 @@ /// exceptional case, so the result is unlikely. static const uint32_t FPH_UNO_WEIGHT = 1; -/// Invoke-terminating normal branch taken weight -/// -/// This is the weight for branching to the normal destination of an invoke -/// instruction. We expect this to happen most of the time. Set the weight to an -/// absurdly high value so that nested loops subsume it. -static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1; - -/// Invoke-terminating normal branch not-taken weight. -/// -/// This is the weight for branching to the unwind destination of an invoke -/// instruction. This is essentially never taken. -static const uint32_t IH_NONTAKEN_WEIGHT = 1; +/// Set of dedicated "absolute" execution weights for a block. These weights are +/// meaningful relative to each other and their derivatives only. +enum class BlockExecWeight : std::uint32_t { + /// Special weight used for cases with exact zero probability. + ZERO = 0x0, + /// Minimal possible non zero weight. + LOWEST_NON_ZERO = 0x1, + /// Weight to an 'unreachable' block. + UNREACHABLE = ZERO, + /// Weight to a block containing non returning call. + NORETURN = LOWEST_NON_ZERO, + /// Weight to 'unwind' block of an invoke instruction. + UNWIND = LOWEST_NON_ZERO, + /// Weight to a 'cold' block. Cold blocks are the ones containing calls marked + /// with attribute 'cold'. + COLD = 0xffff, + /// Default weight is used in cases when there is no dedicated execution + /// weight set. It is not propagated through the domination line either. + DEFAULT = 0xfffff +}; BranchProbabilityInfo::SccInfo::SccInfo(const Function &F) { // Record SCC numbers of blocks in the CFG to identify irreducible loops. @@ -306,133 +299,6 @@ } } -static void UpdatePDTWorklist(const BasicBlock *BB, PostDominatorTree *PDT, - SmallVectorImpl &WorkList, - SmallPtrSetImpl &TargetSet) { - SmallVector Descendants; - SmallPtrSet NewItems; - - PDT->getDescendants(const_cast(BB), Descendants); - for (auto *BB : Descendants) - if (TargetSet.insert(BB).second) - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - if (!TargetSet.count(*PI)) - NewItems.insert(*PI); - WorkList.insert(WorkList.end(), NewItems.begin(), NewItems.end()); -} - -/// Compute a set of basic blocks that are post-dominated by unreachables. -void BranchProbabilityInfo::computePostDominatedByUnreachable( - const Function &F, PostDominatorTree *PDT) { - SmallVector WorkList; - for (auto &BB : F) { - const Instruction *TI = BB.getTerminator(); - if (TI->getNumSuccessors() == 0) { - if (isa(TI) || - // If this block is terminated by a call to - // @llvm.experimental.deoptimize then treat it like an unreachable - // since the @llvm.experimental.deoptimize call is expected to - // practically never execute. - BB.getTerminatingDeoptimizeCall()) - UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByUnreachable); - } - } - - while (!WorkList.empty()) { - const BasicBlock *BB = WorkList.pop_back_val(); - if (PostDominatedByUnreachable.count(BB)) - continue; - // If the terminator is an InvokeInst, check only the normal destination - // block as the unwind edge of InvokeInst is also very unlikely taken. - if (auto *II = dyn_cast(BB->getTerminator())) { - if (PostDominatedByUnreachable.count(II->getNormalDest())) - UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable); - } - // If all the successors are unreachable, BB is unreachable as well. - else if (!successors(BB).empty() && - llvm::all_of(successors(BB), [this](const BasicBlock *Succ) { - return PostDominatedByUnreachable.count(Succ); - })) - UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable); - } -} - -/// compute a set of basic blocks that are post-dominated by ColdCalls. -void BranchProbabilityInfo::computePostDominatedByColdCall( - const Function &F, PostDominatorTree *PDT) { - SmallVector WorkList; - for (auto &BB : F) - for (auto &I : BB) - if (const CallInst *CI = dyn_cast(&I)) - if (CI->hasFnAttr(Attribute::Cold)) - UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByColdCall); - - while (!WorkList.empty()) { - const BasicBlock *BB = WorkList.pop_back_val(); - - // If the terminator is an InvokeInst, check only the normal destination - // block as the unwind edge of InvokeInst is also very unlikely taken. - if (auto *II = dyn_cast(BB->getTerminator())) { - if (PostDominatedByColdCall.count(II->getNormalDest())) - UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall); - } - // If all of successor are post dominated then BB is also done. - else if (!successors(BB).empty() && - llvm::all_of(successors(BB), [this](const BasicBlock *Succ) { - return PostDominatedByColdCall.count(Succ); - })) - UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall); - } -} - -/// Calculate edge weights for successors lead to unreachable. -/// -/// Predict that a successor which leads necessarily to an -/// unreachable-terminated block as extremely unlikely. -bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { - const Instruction *TI = BB->getTerminator(); - (void) TI; - assert(TI->getNumSuccessors() > 1 && "expected more than one successor!"); - assert(!isa(TI) && - "Invokes should have already been handled by calcInvokeHeuristics"); - - SmallVector UnreachableEdges; - SmallVector ReachableEdges; - - for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) - if (PostDominatedByUnreachable.count(*I)) - UnreachableEdges.push_back(I.getSuccessorIndex()); - else - ReachableEdges.push_back(I.getSuccessorIndex()); - - // Skip probabilities if all were reachable. - if (UnreachableEdges.empty()) - return false; - - SmallVector EdgeProbabilities( - BB->getTerminator()->getNumSuccessors(), BranchProbability::getUnknown()); - if (ReachableEdges.empty()) { - BranchProbability Prob(1, UnreachableEdges.size()); - for (unsigned SuccIdx : UnreachableEdges) - EdgeProbabilities[SuccIdx] = Prob; - setEdgeProbability(BB, EdgeProbabilities); - return true; - } - - auto UnreachableProb = UR_TAKEN_PROB; - auto ReachableProb = - (BranchProbability::getOne() - UR_TAKEN_PROB * UnreachableEdges.size()) / - ReachableEdges.size(); - - for (unsigned SuccIdx : UnreachableEdges) - EdgeProbabilities[SuccIdx] = UnreachableProb; - for (unsigned SuccIdx : ReachableEdges) - EdgeProbabilities[SuccIdx] = ReachableProb; - - setEdgeProbability(BB, EdgeProbabilities); - return true; -} - // Propagate existing explicit probabilities from either profile data or // 'expect' intrinsic processing. Examine metadata against unreachable // heuristic. The probability of the edge coming to unreachable block is @@ -473,7 +339,12 @@ "Too many bits for uint32_t"); Weights.push_back(Weight->getZExtValue()); WeightSum += Weights.back(); - if (PostDominatedByUnreachable.count(TI->getSuccessor(I - 1))) + const LoopBlock SrcLoopBB = getLoopBlock(BB); + const LoopBlock DstLoopBB = getLoopBlock(TI->getSuccessor(I - 1)); + auto EstimatedWeight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB}); + if (EstimatedWeight && + EstimatedWeight.getValue() <= + static_cast(BlockExecWeight::UNREACHABLE)) UnreachableIdxs.push_back(I - 1); else ReachableIdxs.push_back(I - 1); @@ -578,60 +449,6 @@ return true; } -/// Calculate edge weights for edges leading to cold blocks. -/// -/// A cold block is one post-dominated by a block with a call to a -/// cold function. Those edges are unlikely to be taken, so we give -/// them relatively low weight. -/// -/// Return true if we could compute the weights for cold edges. -/// Return false, otherwise. -bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) { - const Instruction *TI = BB->getTerminator(); - (void) TI; - assert(TI->getNumSuccessors() > 1 && "expected more than one successor!"); - assert(!isa(TI) && - "Invokes should have already been handled by calcInvokeHeuristics"); - - // Determine which successors are post-dominated by a cold block. - SmallVector ColdEdges; - SmallVector NormalEdges; - for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) - if (PostDominatedByColdCall.count(*I)) - ColdEdges.push_back(I.getSuccessorIndex()); - else - NormalEdges.push_back(I.getSuccessorIndex()); - - // Skip probabilities if no cold edges. - if (ColdEdges.empty()) - return false; - - SmallVector EdgeProbabilities( - BB->getTerminator()->getNumSuccessors(), BranchProbability::getUnknown()); - if (NormalEdges.empty()) { - BranchProbability Prob(1, ColdEdges.size()); - for (unsigned SuccIdx : ColdEdges) - EdgeProbabilities[SuccIdx] = Prob; - setEdgeProbability(BB, EdgeProbabilities); - return true; - } - - auto ColdProb = BranchProbability::getBranchProbability( - CC_TAKEN_WEIGHT, - (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * uint64_t(ColdEdges.size())); - auto NormalProb = BranchProbability::getBranchProbability( - CC_NONTAKEN_WEIGHT, - (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * uint64_t(NormalEdges.size())); - - for (unsigned SuccIdx : ColdEdges) - EdgeProbabilities[SuccIdx] = ColdProb; - for (unsigned SuccIdx : NormalEdges) - EdgeProbabilities[SuccIdx] = NormalProb; - - setEdgeProbability(BB, EdgeProbabilities); - return true; -} - // Calculate Edge Weights using "Pointer Heuristics". Predict a comparison // between two pointer or pointer and NULL will fail. bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) { @@ -775,81 +592,324 @@ } } -// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges -// as taken, exiting edges as not-taken. -bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB, - const LoopInfo &LI) { - LoopBlock LB(BB, LI, *SccI.get()); - if (!LB.belongsToLoop()) +Optional +BranchProbabilityInfo::getEstimatedBlockWeight(const BasicBlock *BB) const { + auto WeightIt = EstimatedBlockWeight.find(BB); + if (WeightIt == EstimatedBlockWeight.end()) + return None; + return WeightIt->second; +} + +Optional +BranchProbabilityInfo::getEstimatedLoopWeight(const LoopData &L) const { + auto WeightIt = EstimatedLoopWeight.find(L); + if (WeightIt == EstimatedLoopWeight.end()) + return None; + return WeightIt->second; +} + +Optional +BranchProbabilityInfo::getEstimatedEdgeWeight(const LoopEdge &Edge) const { + // For edges entering a loop take weight of a loop rather than an individual + // block in the loop. + return isLoopEnteringEdge(Edge) + ? getEstimatedLoopWeight(Edge.second.getLoopData()) + : getEstimatedBlockWeight(Edge.second.getBlock()); +} + +template +Optional BranchProbabilityInfo::getMaxEstimatedEdgeWeight( + const LoopBlock &SrcLoopBB, iterator_range Successors) const { + SmallVector Weights; + Optional MaxWeight; + for (const BasicBlock *DstBB : Successors) { + const LoopBlock DstLoopBB = getLoopBlock(DstBB); + auto Weight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB}); + + if (!Weight) + return None; + + if (!MaxWeight || MaxWeight.getValue() < Weight.getValue()) + MaxWeight = Weight; + } + + return MaxWeight; +} + +// Updates \p LoopBB's weight and returns true. If \p LoopBB has already +// an associated weight it is unchanged and false is returned. +// +// Please note by the algorithm the weight is not expected to change once set +// thus 'false' status is used to track visited blocks. +bool BranchProbabilityInfo::updateEstimatedBlockWeight( + LoopBlock &LoopBB, uint32_t BBWeight, + SmallVectorImpl &BlockWorkList, + SmallVectorImpl &LoopWorkList) { + BasicBlock *BB = LoopBB.getBlock(); + + // In general, weight is assigned to a block when it has final value and + // can't/shouldn't be changed. However, there are cases when a block + // inherently has several (possibly "contradicting") weights. For example, + // "unwind" block may also contain "cold" call. In that case the first + // set weight is favored and all consequent weights are ignored. + if (!EstimatedBlockWeight.insert({BB, BBWeight}).second) return false; - SmallPtrSet UnlikelyBlocks; - if (LB.getLoop()) - computeUnlikelySuccessors(BB, LB.getLoop(), UnlikelyBlocks); + for (BasicBlock *PredBlock : predecessors(BB)) { + LoopBlock PredLoop = getLoopBlock(PredBlock); + // Add affected block/loop to a working list. + if (isLoopExitingEdge({PredLoop, LoopBB})) { + if (!EstimatedLoopWeight.count(PredLoop.getLoopData())) + LoopWorkList.push_back(PredLoop); + } else if (!EstimatedBlockWeight.count(PredBlock)) + BlockWorkList.push_back(PredBlock); + } + return true; +} - SmallVector BackEdges; - SmallVector ExitingEdges; - SmallVector InEdges; // Edges from header to the loop. - SmallVector UnlikelyEdges; +// Starting from \p BB traverse through dominator blocks and assign \p BBWeight +// to all such blocks that are post dominated by \BB. In other words to all +// blocks that the one is executed if and only if another one is executed. +// Importantly, we skip loops here for two reasons. First weights of blocks in +// a loop should be scaled by trip count (yet possibly unknown). Second there is +// no any value in doing that because that doesn't give any additional +// information regarding distribution of probabilities inside the loop. +// Exception is loop 'enter' and 'exit' edges that are handled in a special way +// at calcEstimatedHeuristics. +// +// In addition, \p WorkList is populated with basic blocks if at leas one +// successor has updated estimated weight. +void BranchProbabilityInfo::propagateEstimatedBlockWeight( + const LoopBlock &LoopBB, DominatorTree *DT, PostDominatorTree *PDT, + uint32_t BBWeight, SmallVectorImpl &BlockWorkList, + SmallVectorImpl &LoopWorkList) { + const BasicBlock *BB = LoopBB.getBlock(); + const auto *DTStartNode = DT->getNode(BB); + const auto *PDTStartNode = PDT->getNode(BB); + + // TODO: Consider propagating weight down the domination line as well. + for (const auto *DTNode = DTStartNode; DTNode != nullptr; + DTNode = DTNode->getIDom()) { + auto *DomBB = DTNode->getBlock(); + // Consider blocks which lie on one 'line'. + if (!PDT->dominates(PDTStartNode, PDT->getNode(DomBB))) + // If BB doesn't post dominate DomBB it will not post dominate dominators + // of DomBB as well. + break; - for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - LoopBlock SuccLB(*I, LI, *SccI.get()); - LoopEdge Edge(LB, SuccLB); - bool IsUnlikelyEdge = LB.getLoop() && UnlikelyBlocks.contains(*I); - - if (IsUnlikelyEdge) - UnlikelyEdges.push_back(I.getSuccessorIndex()); - else if (isLoopExitingEdge(Edge)) - ExitingEdges.push_back(I.getSuccessorIndex()); - else if (isLoopBackEdge(Edge)) - BackEdges.push_back(I.getSuccessorIndex()); - else { - InEdges.push_back(I.getSuccessorIndex()); + LoopBlock DomLoopBB = getLoopBlock(DomBB); + const LoopEdge Edge{DomLoopBB, LoopBB}; + // Don't propagate weight to blocks belonging to different loops. + if (!isLoopEnteringExitingEdge(Edge)) { + if (!updateEstimatedBlockWeight(DomLoopBB, BBWeight, BlockWorkList, + LoopWorkList)) + // If DomBB has weight set then all it's predecessors are already + // processed (since we propagate weight up to the top of IR each time). + break; + } else if (isLoopExitingEdge(Edge)) { + LoopWorkList.push_back(DomLoopBB); } } +} + +Optional BranchProbabilityInfo::getInitialEstimatedBlockWeight( + const BasicBlock *BB) { + // Returns true if \p BB has call marked with "NoReturn" attribute. + auto hasNoReturn = [&](const BasicBlock *BB) { + for (const auto &I : reverse(*BB)) + if (const CallInst *CI = dyn_cast(&I)) + if (CI->hasFnAttr(Attribute::NoReturn)) + return true; - if (BackEdges.empty() && ExitingEdges.empty() && UnlikelyEdges.empty()) return false; + }; + + // Important note regarding the order of checks. They are ordered by weight + // from lowest to highest. Doing that allows to avoid "unstable" results + // when several conditions heuristics can be applied simultaneously. + if (isa(BB->getTerminator()) || + // If this block is terminated by a call to + // @llvm.experimental.deoptimize then treat it like an unreachable + // since it is expected to practically never execute. + // TODO: Should we actually treat as never returning call? + BB->getTerminatingDeoptimizeCall()) + return hasNoReturn(BB) + ? static_cast(BlockExecWeight::NORETURN) + : static_cast(BlockExecWeight::UNREACHABLE); + + // Check if the block is 'unwind' handler of some invoke instruction. + for (const auto *Pred : predecessors(BB)) + if (Pred) + if (const auto *II = dyn_cast(Pred->getTerminator())) + if (II->getUnwindDest() == BB) + return static_cast(BlockExecWeight::UNWIND); + + // Check if the block contains 'cold' call. + for (const auto &I : *BB) + if (const CallInst *CI = dyn_cast(&I)) + if (CI->hasFnAttr(Attribute::Cold)) + return static_cast(BlockExecWeight::COLD); + + return None; +} - // Collect the sum of probabilities of back-edges/in-edges/exiting-edges, and - // normalize them so that they sum up to one. - unsigned Denom = (BackEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) + - (InEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) + - (UnlikelyEdges.empty() ? 0 : LBH_UNLIKELY_WEIGHT) + - (ExitingEdges.empty() ? 0 : LBH_NONTAKEN_WEIGHT); +// Does RPO traversal over all blocks in \p F and assigns weights to +// 'unreachable', 'noreturn', 'cold', 'unwind' blocks. In addition it does its +// best to propagate the weight to up/down the IR. +void BranchProbabilityInfo::computeEestimateBlockWeight( + const Function &F, DominatorTree *DT, PostDominatorTree *PDT) { + SmallVector BlockWorkList; + SmallVector LoopWorkList; + + // By doing RPO we make sure that all predecessors already have weights + // calculated before visiting theirs successors. + ReversePostOrderTraversal RPOT(&F); + for (const auto *BB : RPOT) + if (auto BBWeight = getInitialEstimatedBlockWeight(BB)) + // If we were able to find estimated weight for the block set it to this + // block and propagate up the IR. + propagateEstimatedBlockWeight(getLoopBlock(BB), DT, PDT, + BBWeight.getValue(), BlockWorkList, + LoopWorkList); + + // BlockWorklist/LoopWorkList contains blocks/loops with at least one + // successor/exit having estimated weight. Try to propagate weight to such + // blocks/loops from successors/exits. + // Process loops and blocks. Order is not important. + do { + while (!LoopWorkList.empty()) { + const LoopBlock LoopBB = LoopWorkList.pop_back_val(); + + if (EstimatedLoopWeight.count(LoopBB.getLoopData())) + continue; - SmallVector EdgeProbabilities( - BB->getTerminator()->getNumSuccessors(), BranchProbability::getUnknown()); - if (uint32_t numBackEdges = BackEdges.size()) { - BranchProbability TakenProb = BranchProbability(LBH_TAKEN_WEIGHT, Denom); - auto Prob = TakenProb / numBackEdges; - for (unsigned SuccIdx : BackEdges) - EdgeProbabilities[SuccIdx] = Prob; - } + SmallVector Exits; + getLoopExitBlocks(LoopBB, Exits); + auto LoopWeight = getMaxEstimatedEdgeWeight( + LoopBB, make_range(Exits.begin(), Exits.end())); - if (uint32_t numInEdges = InEdges.size()) { - BranchProbability TakenProb = BranchProbability(LBH_TAKEN_WEIGHT, Denom); - auto Prob = TakenProb / numInEdges; - for (unsigned SuccIdx : InEdges) - EdgeProbabilities[SuccIdx] = Prob; - } + if (LoopWeight) { + // If we never exit the loop then we can enter it once at maximum. + if (LoopWeight <= static_cast(BlockExecWeight::UNREACHABLE)) + LoopWeight = static_cast(BlockExecWeight::LOWEST_NON_ZERO); + + EstimatedLoopWeight.insert( + {LoopBB.getLoopData(), LoopWeight.getValue()}); + // Add all blocks entering the loop into working list. + getLoopEnterBlocks(LoopBB, BlockWorkList); + } + } - if (uint32_t numExitingEdges = ExitingEdges.size()) { - BranchProbability NotTakenProb = BranchProbability(LBH_NONTAKEN_WEIGHT, - Denom); - auto Prob = NotTakenProb / numExitingEdges; - for (unsigned SuccIdx : ExitingEdges) - EdgeProbabilities[SuccIdx] = Prob; + while (!BlockWorkList.empty()) { + // We can reach here only if BlockWorkList is not empty. + const BasicBlock *BB = BlockWorkList.pop_back_val(); + if (EstimatedBlockWeight.count(BB)) + continue; + + // We take maximum over all weights of successors. In other words we take + // weight of "hot" path. In theory we can probably find a better function + // which gives higher accuracy results (comparing to "maximum") but I + // can't + // think of any right now. And I doubt it will make any difference in + // practice. + const LoopBlock LoopBB = getLoopBlock(BB); + auto MaxWeight = getMaxEstimatedEdgeWeight(LoopBB, successors(BB)); + + if (MaxWeight) + propagateEstimatedBlockWeight(LoopBB, DT, PDT, MaxWeight.getValue(), + BlockWorkList, LoopWorkList); + } + } while (!BlockWorkList.empty() || !LoopWorkList.empty()); +} + +// Calculate edge probabilities based on block's estimated weight. +// Note that gathered weights were not scaled for loops. Thus edges entering +// and exiting loops requires special processing. +bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock *BB) { + assert(BB->getTerminator()->getNumSuccessors() > 1 && + "expected more than one successor!"); + + const LoopBlock LoopBB = getLoopBlock(BB); + + SmallPtrSet UnlikelyBlocks; + uint32_t TC = LBH_TAKEN_WEIGHT / LBH_NONTAKEN_WEIGHT; + if (LoopBB.getLoop()) + computeUnlikelySuccessors(BB, LoopBB.getLoop(), UnlikelyBlocks); + + // Changed to 'true' if at least one successor has estimated weight. + bool FoundEstimatedWeight = false; + SmallVector SuccWeights; + uint64_t TotalWeight = 0; + // Go over all successors of BB and put their weights into SuccWeights. + for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + const BasicBlock *SuccBB = *I; + Optional Weight; + const LoopBlock SuccLoopBB = getLoopBlock(SuccBB); + const LoopEdge Edge{LoopBB, SuccLoopBB}; + + Weight = getEstimatedEdgeWeight(Edge); + + if (isLoopExitingEdge(Edge) && + // Avoid adjustment of ZERO weight since it should remain unchanged. + Weight != static_cast(BlockExecWeight::ZERO)) { + // Scale down loop exiting weight by trip count. + Weight = std::max( + static_cast(BlockExecWeight::LOWEST_NON_ZERO), + Weight.getValueOr(static_cast(BlockExecWeight::DEFAULT)) / + TC); + } + bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.contains(SuccBB); + if (IsUnlikelyEdge && + // Avoid adjustment of ZERO weight since it should remain unchanged. + Weight != static_cast(BlockExecWeight::ZERO)) { + // 'Unlikely' blocks have twice lower weight. + Weight = std::max( + static_cast(BlockExecWeight::LOWEST_NON_ZERO), + Weight.getValueOr(static_cast(BlockExecWeight::DEFAULT)) / + 2); + } + + if (Weight) + FoundEstimatedWeight = true; + + auto WeightVal = + Weight.getValueOr(static_cast(BlockExecWeight::DEFAULT)); + TotalWeight += WeightVal; + SuccWeights.push_back(WeightVal); } - if (uint32_t numUnlikelyEdges = UnlikelyEdges.size()) { - BranchProbability UnlikelyProb = BranchProbability(LBH_UNLIKELY_WEIGHT, - Denom); - auto Prob = UnlikelyProb / numUnlikelyEdges; - for (unsigned SuccIdx : UnlikelyEdges) - EdgeProbabilities[SuccIdx] = Prob; + // If non of blocks have estimated weight bail out. + // If TotalWeight is 0 that means weight of each successor is 0 as well and + // equally likely. Bail out early to not deal with devision by zero. + if (!FoundEstimatedWeight || TotalWeight == 0) + return false; + + assert(SuccWeights.size() == succ_size(BB) && "Missed successor?"); + const unsigned SuccCount = SuccWeights.size(); + + // If the sum of weights does not fit in 32 bits, scale every weight down + // accordingly. + if (TotalWeight > UINT32_MAX) { + uint64_t ScalingFactor = TotalWeight / UINT32_MAX + 1; + TotalWeight = 0; + for (unsigned Idx = 0; Idx < SuccCount; ++Idx) { + SuccWeights[Idx] /= ScalingFactor; + if (SuccWeights[Idx] == static_cast(BlockExecWeight::ZERO)) + SuccWeights[Idx] = + static_cast(BlockExecWeight::LOWEST_NON_ZERO); + TotalWeight += SuccWeights[Idx]; + } + assert(TotalWeight <= UINT32_MAX && "Total weight overflows"); } + // Finally set probabilities to edges according to estimated block weights. + SmallVector EdgeProbabilities( + SuccCount, BranchProbability::getUnknown()); + + for (unsigned Idx = 0; Idx < SuccCount; ++Idx) { + EdgeProbabilities[Idx] = + BranchProbability(SuccWeights[Idx], (uint32_t)TotalWeight); + } setEdgeProbability(BB, EdgeProbabilities); return true; } @@ -1015,18 +1075,6 @@ return true; } -bool BranchProbabilityInfo::calcInvokeHeuristics(const BasicBlock *BB) { - const InvokeInst *II = dyn_cast(BB->getTerminator()); - if (!II) - return false; - - BranchProbability TakenProb(IH_TAKEN_WEIGHT, - IH_TAKEN_WEIGHT + IH_NONTAKEN_WEIGHT); - setEdgeProbability( - BB, SmallVector({TakenProb, TakenProb.getCompl()})); - return true; -} - void BranchProbabilityInfo::releaseMemory() { Probs.clear(); Handles.clear(); @@ -1202,26 +1250,34 @@ } } -void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, +void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LoopI, const TargetLibraryInfo *TLI, + DominatorTree *DT, PostDominatorTree *PDT) { LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() << " ----\n\n"); LastF = &F; // Store the last function we ran on for printing. - assert(PostDominatedByUnreachable.empty()); - assert(PostDominatedByColdCall.empty()); + LI = &LoopI; SccI = std::make_unique(F); + assert(EstimatedBlockWeight.empty()); + assert(EstimatedLoopWeight.empty()); + + std::unique_ptr DTPtr; std::unique_ptr PDTPtr; + if (!DT) { + DTPtr = std::make_unique(const_cast(F)); + DT = DTPtr.get(); + } + if (!PDT) { PDTPtr = std::make_unique(const_cast(F)); PDT = PDTPtr.get(); } - computePostDominatedByUnreachable(F, PDT); - computePostDominatedByColdCall(F, PDT); + computeEestimateBlockWeight(F, DT, PDT); // Walk the basic blocks in post-order so that we can build up state about // the successors of a block iteratively. @@ -1233,13 +1289,7 @@ continue; if (calcMetadataWeights(BB)) continue; - if (calcInvokeHeuristics(BB)) - continue; - if (calcUnreachableHeuristics(BB)) - continue; - if (calcColdCallHeuristics(BB)) - continue; - if (calcLoopBranchHeuristics(BB, LI)) + if (calcEstimatedHeuristics(BB)) continue; if (calcPointerHeuristics(BB)) continue; @@ -1249,8 +1299,8 @@ continue; } - PostDominatedByUnreachable.clear(); - PostDominatedByColdCall.clear(); + EstimatedLoopWeight.clear(); + EstimatedBlockWeight.clear(); SccI.reset(); if (PrintBranchProb && @@ -1268,6 +1318,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.setPreservesAll(); } @@ -1276,9 +1327,10 @@ const LoopInfo &LI = getAnalysis().getLoopInfo(); const TargetLibraryInfo &TLI = getAnalysis().getTLI(F); + DominatorTree &DT = getAnalysis().getDomTree(); PostDominatorTree &PDT = getAnalysis().getPostDomTree(); - BPI.calculate(F, LI, &TLI, &PDT); + BPI.calculate(F, LI, &TLI, &DT, &PDT); return false; } @@ -1295,6 +1347,7 @@ BranchProbabilityInfo BPI; BPI.calculate(F, AM.getResult(F), &AM.getResult(F), + &AM.getResult(F), &AM.getResult(F)); return BPI; } diff --git a/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp b/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp --- a/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp +++ b/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp @@ -37,7 +37,7 @@ LI.analyze(DT); // Then compute BranchProbabilityInfo. - BranchProbabilityInfo BPI(*F, LI); + BranchProbabilityInfo BPI(*F, LI, nullptr, &DT, nullptr); // Finally compute BFI. OwnedBFI = std::make_unique(*F, BPI, LI); diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp --- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -362,7 +362,7 @@ // For the new PM, we also can't use BranchProbabilityInfo as an analysis // pass. Function analyses need to be preserved across loop transformations // but BPI is not preserved, hence a newly built one is needed. - BranchProbabilityInfo BPI(*F, AR.LI, &AR.TLI); + BranchProbabilityInfo BPI(*F, AR.LI, &AR.TLI, &AR.DT, nullptr); LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI, &BPI); if (!LP.runOnLoop(&L)) return PreservedAnalyses::all(); diff --git a/llvm/test/Analysis/BlockFrequencyInfo/redundant_edges.ll b/llvm/test/Analysis/BlockFrequencyInfo/redundant_edges.ll --- a/llvm/test/Analysis/BlockFrequencyInfo/redundant_edges.ll +++ b/llvm/test/Analysis/BlockFrequencyInfo/redundant_edges.ll @@ -9,7 +9,7 @@ entry: br label %loop -; CHECK-NEXT: loop: float = 32.0 +; CHECK-NEXT: loop: float = 16.5 loop: switch i32 undef, label %loop [ i32 0, label %return diff --git a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll --- a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll @@ -124,8 +124,8 @@ ; CHECK: Printing analysis {{.*}} for function 'test5' entry: br i1 %flag, label %then, label %else -; CHECK: edge entry -> then probability is 0x07878788 / 0x80000000 = 5.88% -; CHECK: edge entry -> else probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge] +; CHECK: edge entry -> then probability is 0x078780e3 / 0x80000000 = 5.88% +; CHECK: edge entry -> else probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] then: call void @coldfunc() @@ -145,15 +145,16 @@ entry: %cond1 = icmp eq i32 %a, 42 br i1 %cond1, label %header, label %exit - +; CHECK: edge entry -> header probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge entry -> exit probability is 0x40000000 / 0x80000000 = 50.00% header: br label %body body: %cond2 = icmp eq i32 %b, 42 br i1 %cond2, label %header, label %exit -; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00% - +; CHECK: edge body -> header probability is 0x7fbe1203 / 0x80000000 = 99.80% [HOT edge] +; CHECK: edge body -> exit probability is 0x0041edfd / 0x80000000 = 0.20% exit: call void @coldfunc() ret i32 %b @@ -165,8 +166,8 @@ ; CHECK: Printing analysis {{.*}} for function 'test_cold_call_sites_with_prof' entry: br i1 %flag, label %then, label %else -; CHECK: edge entry -> then probability is 0x07878788 / 0x80000000 = 5.88% -; CHECK: edge entry -> else probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge] +; CHECK: edge entry -> then probability is 0x078780e3 / 0x80000000 = 5.88% +; CHECK: edge entry -> else probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] then: br i1 %flag2, label %then2, label %else2, !prof !3 @@ -206,8 +207,8 @@ ; after that is fixed. ; CHECK: Printing analysis {{.*}} for function 'test_cold_call_sites' -; CHECK: edge entry -> then probability is 0x07878788 / 0x80000000 = 5.88% -; CHECK: edge entry -> else probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge] +; CHECK: edge entry -> then probability is 0x078780e3 / 0x80000000 = 5.88% +; CHECK: edge entry -> else probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] entry: %gep1 = getelementptr i32, i32* %a, i32 1 @@ -238,14 +239,14 @@ ; Edge "entry->if.end" should have higher probability based on the cold call ; heuristic which treat %if.then as a cold block because the normal destination ; of the invoke instruction in %if.then is post-dominated by ColdFunc(). -; CHECK: edge entry -> if.then probability is 0x07878788 / 0x80000000 = 5.88% -; CHECK: edge entry -> if.end probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge] +; CHECK: edge entry -> if.then probability is 0x078780e3 / 0x80000000 = 5.88% +; CHECK: edge entry -> if.end probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] if.then: invoke i32 @InvokeCall() to label %invoke.cont unwind label %lpad -; CHECK: edge if.then -> invoke.cont probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge if.then -> lpad probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge if.then -> invoke.cont probability is 0x7fff8000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge if.then -> lpad probability is 0x00008000 / 0x80000000 = 0.00% invoke.cont: call void @ColdFunc() #0 @@ -267,13 +268,12 @@ ; CHECK: edge entry -> if.then probability is 0x40000000 / 0x80000000 = 50.00% ; CHECK: edge entry -> if.end probability is 0x40000000 / 0x80000000 = 50.00% - if.then: invoke i32 @InvokeCall() to label %invoke.cont unwind label %lpad ; The cold call heuristic should not kick in when the cold callsite is in EH path. -; CHECK: edge if.then -> invoke.cont probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge if.then -> lpad probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge if.then -> invoke.cont probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge if.then -> lpad probability is 0x00000800 / 0x80000000 = 0.00% invoke.cont: br label %if.end @@ -292,16 +292,16 @@ define i32 @test_invoke_code_callsite3(i1 %c) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: br i1 %c, label %if.then, label %if.end -; CHECK: edge entry -> if.then probability is 0x07878788 / 0x80000000 = 5.88% -; CHECK: edge entry -> if.end probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge] +; CHECK: edge entry -> if.then probability is 0x078780e3 / 0x80000000 = 5.88% +; CHECK: edge entry -> if.end probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] if.then: invoke i32 @InvokeCall() to label %invoke.cont unwind label %lpad ; Regardless of cold calls, edge weights from a invoke instruction should be ; determined by the invoke heuristic. -; CHECK: edge if.then -> invoke.cont probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge if.then -> lpad probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge if.then -> invoke.cont probability is 0x7fff8000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge if.then -> lpad probability is 0x00008000 / 0x80000000 = 0.00% invoke.cont: call void @ColdFunc() #0 diff --git a/llvm/test/Analysis/BranchProbabilityInfo/deopt-intrinsic.ll b/llvm/test/Analysis/BranchProbabilityInfo/deopt-intrinsic.ll --- a/llvm/test/Analysis/BranchProbabilityInfo/deopt-intrinsic.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/deopt-intrinsic.ll @@ -9,8 +9,8 @@ %cond = icmp eq i32 %a, 42 br i1 %cond, label %exit, label %deopt -; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge entry -> deopt probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> deopt probability is 0x00000000 / 0x80000000 = 0.00% deopt: %rval = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] diff --git a/llvm/test/Analysis/BranchProbabilityInfo/deopt-invoke.ll b/llvm/test/Analysis/BranchProbabilityInfo/deopt-invoke.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/BranchProbabilityInfo/deopt-invoke.ll @@ -0,0 +1,107 @@ +; RUN: opt -analyze -branch-prob < %s | FileCheck %s +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +declare i32* @"personality_function"() #1 +declare void @foo(i32) +declare void @bar() +declare void @llvm.experimental.deoptimize.isVoid(...) +declare void @cold() cold + +; Even though the likeliness of 'invoke' to throw an exception is assessed as low +; all other paths are even less likely. Check that hot paths leads to excepion handler. +define void @test1(i32 %0) personality i32* ()* @"personality_function" !prof !1 { +;CHECK: edge entry -> unreached probability is 0x00000001 / 0x80000000 = 0.00% +;CHECK: edge entry -> invoke probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge invoke -> invoke.cont.unreached probability is 0x00000000 / 0x80000000 = 0.00% +;CHECK: edge invoke -> land.pad probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge land.pad -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + br i1 undef, label %unreached, label %invoke, !prof !2 +invoke: + invoke void @foo(i32 %0) + to label %invoke.cont.unreached unwind label %land.pad +invoke.cont.unreached: + call void (...) @llvm.experimental.deoptimize.isVoid(i32 10) [ "deopt"() ] + ret void + +unreached: + unreachable + +land.pad: + %v20 = landingpad { i8*, i32 } + cleanup + %v21 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(256)* inttoptr (i64 8 to i8 addrspace(1)* addrspace(256)*), align 8 + br label %exit + +exit: + call void @bar() + ret void +} + +define void @test2(i32 %0) personality i32* ()* @"personality_function" { +;CHECK: edge entry -> unreached probability is 0x00000000 / 0x80000000 = 0.00% +;CHECK: edge entry -> invoke probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge invoke -> invoke.cont.cold probability is 0x7fff8000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge invoke -> land.pad probability is 0x00008000 / 0x80000000 = 0.00% +;CHECK: edge land.pad -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + br i1 undef, label %unreached, label %invoke +invoke: + invoke void @foo(i32 %0) + to label %invoke.cont.cold unwind label %land.pad +invoke.cont.cold: + call void @cold() + ret void + +unreached: + unreachable + +land.pad: + %v20 = landingpad { i8*, i32 } + cleanup + %v21 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(256)* inttoptr (i64 8 to i8 addrspace(1)* addrspace(256)*), align 8 + br label %exit + +exit: + call void @bar() + ret void +} + +define void @test3(i32 %0) personality i32* ()* @"personality_function" { +;CHECK: edge entry -> unreached probability is 0x00000000 / 0x80000000 = 0.00% +;CHECK: edge entry -> invoke probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge invoke -> invoke.cont.cold probability is 0x7fff8000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge invoke -> land.pad probability is 0x00008000 / 0x80000000 = 0.00% +;CHECK: edge land.pad -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +entry: + br i1 undef, label %unreached, label %invoke +invoke: + invoke void @foo(i32 %0) + to label %invoke.cont.cold unwind label %land.pad +invoke.cont.cold: + call void @cold() + ret void + +unreached: + unreachable + +land.pad: + %v20 = landingpad { i8*, i32 } + cleanup + %v21 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(256)* inttoptr (i64 8 to i8 addrspace(1)* addrspace(256)*), align 8 + call void @cold() + br label %exit + +exit: + call void @bar() + ret void +} + + +attributes #1 = { nounwind } + +!1 = !{!"function_entry_count", i64 32768} +!2 = !{!"branch_weights", i32 1, i32 983040} + diff --git a/llvm/test/Analysis/BranchProbabilityInfo/loop.ll b/llvm/test/Analysis/BranchProbabilityInfo/loop.ll --- a/llvm/test/Analysis/BranchProbabilityInfo/loop.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/loop.ll @@ -428,9 +428,8 @@ %inc = add nsw i32 %count.0, 1 %cmp1 = icmp sgt i32 %count.0, 6 br i1 %cmp1, label %if.then, label %for.inc -; CHECK: edge for.body -> if.then probability is 0x2aaaaaab / 0x80000000 = 33.33% -; CHECK: edge for.body -> for.inc probability is 0x55555555 / 0x80000000 = 66.67% - +; CHECK: edge for.body -> if.then probability is 0x2aaaa8e4 / 0x80000000 = 33.33% +; CHECK: edge for.body -> for.inc probability is 0x5555571c / 0x80000000 = 66.67% if.then: store i32 %add, i32* %arrayidx, align 4 br label %for.inc @@ -521,3 +520,207 @@ } declare i32 @InvokeCall() +declare void @cold() cold + +; If loop has single exit and it leads to 'cold' block then edge leading to loop enter +; should be considered 'cold' as well. +define void @test13() { +; CHECK: edge entry -> loop probability is 0x078780e3 / 0x80000000 = 5.88% +; CHECK: edge entry -> exit probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] +; CHECK: edge loop -> loop probability is 0x7fbe1203 / 0x80000000 = 99.80% [HOT edge] +; CHECK: edge loop -> cold probability is 0x0041edfd / 0x80000000 = 0.20% +; CHECK: edge cold -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + br i1 undef, label %loop, label %exit + +loop: + %i.0 = phi i32 [ 0, %entry ], [ %inc, %loop ] + %inc = add nsw i32 %i.0, 1 + br i1 undef, label %loop, label %cold + +cold: + call void @cold() + br label %exit + +exit: + ret void +} + +; This is the same case as test13 but with additional loop 'preheader' block. +define void @test14() { +; CHECK: edge entry -> preheader probability is 0x078780e3 / 0x80000000 = 5.88% +; CHECK: edge entry -> exit probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] +; CHECK: edge preheader -> loop probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge loop -> loop probability is 0x7fbe1203 / 0x80000000 = 99.80% [HOT edge] +; CHECK: edge loop -> cold probability is 0x0041edfd / 0x80000000 = 0.20% +; CHECK: edge cold -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + br i1 undef, label %preheader, label %exit + +preheader: + br label %loop + +loop: + %i.0 = phi i32 [ 0, %preheader ], [ %inc, %loop ] + %inc = add nsw i32 %i.0, 1 + br i1 undef, label %loop, label %cold + +cold: + call void @cold() + br label %exit + +exit: + ret void +} + +; If loop has multiple low probability exits then edge leading to loop enter +; should be considered low probable as well. +define void @test15() { +; CHECK: edge entry -> loop probability is 0x078780e3 / 0x80000000 = 5.88% +; CHECK: edge entry -> exit probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] +; CHECK: edge loop -> cont probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge loop -> unreached probability is 0x00000000 / 0x80000000 = 0.00% +; CHECK: edge cont -> loop probability is 0x7fbe1203 / 0x80000000 = 99.80% [HOT edge] +; CHECK: edge cont -> cold probability is 0x0041edfd / 0x80000000 = 0.20% +; CHECK: edge cold -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + br i1 undef, label %loop, label %exit + +loop: + %i.0 = phi i32 [ 0, %entry ], [ %inc, %cont ] + %inc = add nsw i32 %i.0, 1 + br i1 undef, label %cont, label %unreached + +cont: + br i1 undef, label %loop, label %cold + +unreached: + unreachable + + +cold: + call void @cold() + br label %exit + +exit: + ret void +} + +; This is the same case as test15 but with additional loop 'preheader' block. +define void @test16() { +; CHECK: edge entry -> preheader probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge preheader -> loop probability is 0x078780e3 / 0x80000000 = 5.88% +; CHECK: edge preheader -> exit probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] +; CHECK: edge loop -> cont probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge loop -> unreached probability is 0x00000000 / 0x80000000 = 0.00% +; CHECK: edge cont -> loop probability is 0x7fbe1203 / 0x80000000 = 99.80% [HOT edge] +; CHECK: edge cont -> cold probability is 0x0041edfd / 0x80000000 = 0.20% +; CHECK: edge cold -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + br label %preheader + +preheader: + br i1 undef, label %loop, label %exit + +loop: + %i.0 = phi i32 [ 0, %preheader ], [ %inc, %cont ] + %inc = add nsw i32 %i.0, 1 + br i1 undef, label %cont, label %unreached + +cont: + br i1 undef, label %loop, label %cold + +unreached: + unreachable + + +cold: + call void @cold() + br label %exit + +exit: + ret void +} + +declare void @abort() noreturn + +; Check that 'preheader' has 50/50 probability since there is one 'normal' exit. +; Check that exit to 'cold' and 'noreturn' has lower probability than 'normal' exit. +define void @test17() { +; CHECK: edge entry -> preheader probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge preheader -> loop probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge preheader -> exit probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge loop -> cont probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge loop -> noreturn probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge cont -> cont2 probability is 0x7fbe1203 / 0x80000000 = 99.80% [HOT edge] +; CHECK: edge cont -> cold probability is 0x0041edfd / 0x80000000 = 0.20% +; CHECK: edge cont2 -> loop probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge] +; CHECK: edge cont2 -> exit probability is 0x04000000 / 0x80000000 = 3.12% +; CHECK: edge cold -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +entry: + br label %preheader + +preheader: + br i1 undef, label %loop, label %exit + +loop: + %i.0 = phi i32 [ 0, %preheader ], [ %inc, %cont2 ] + %inc = add nsw i32 %i.0, 1 + br i1 undef, label %cont, label %noreturn + +cont: + br i1 undef, label %cont2, label %cold + +cont2: + br i1 undef, label %loop, label %exit + +noreturn: + call void @abort() + unreachable + +cold: + call void @cold() + br label %exit + +exit: + ret void +} + + +; This is case with two loops where one nested into another. Nested loop has +; low probable exit what encreases robability to take exit in the top level loop. +define void @test18() { +; CHECK: edge entry -> top.loop probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge top.loop -> loop probability is 0x546cd4b7 / 0x80000000 = 65.96% +; CHECK: edge top.loop -> exit probability is 0x2b932b49 / 0x80000000 = 34.04% +; CHECK: edge loop -> loop probability is 0x7fbe1203 / 0x80000000 = 99.80% [HOT edge] +; CHECK: edge loop -> cold probability is 0x0041edfd / 0x80000000 = 0.20% +; CHECK: edge cold -> top.loop probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + br label %top.loop + +top.loop: + %j.0 = phi i32 [ 0, %entry ], [ %j.inc, %cold ] + br i1 undef, label %loop, label %exit + +loop: + %i.0 = phi i32 [ %j.0, %top.loop ], [ %inc, %loop ] + %inc = add nsw i32 %i.0, 1 + br i1 undef, label %loop, label %cold + +cold: + call void @cold() + %j.inc = add nsw i32 %j.0, 1 + br label %top.loop + +exit: + ret void +} + + + diff --git a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll --- a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll @@ -9,8 +9,8 @@ entry: %cond = icmp eq i32 %a, 42 br i1 %cond, label %exit, label %abort -; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge entry -> abort probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> exit probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> abort probability is 0x00000800 / 0x80000000 = 0.00% abort: call void @abort() noreturn @@ -27,11 +27,11 @@ i32 2, label %case_b i32 3, label %case_c i32 4, label %case_d] -; CHECK: edge entry -> exit probability is 0x7ffffffc / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge entry -> case_a probability is 0x00000001 / 0x80000000 = 0.00% -; CHECK: edge entry -> case_b probability is 0x00000001 / 0x80000000 = 0.00% -; CHECK: edge entry -> case_c probability is 0x00000001 / 0x80000000 = 0.00% -; CHECK: edge entry -> case_d probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> exit probability is 0x7fffe000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> case_a probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge entry -> case_b probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge entry -> case_c probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge entry -> case_d probability is 0x00000800 / 0x80000000 = 0.00% case_a: br label %case_b @@ -56,8 +56,8 @@ entry: %cond1 = icmp eq i32 %a, 42 br i1 %cond1, label %exit, label %dom -; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge entry -> dom probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> exit probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> dom probability is 0x00000800 / 0x80000000 = 0.00% dom: %cond2 = icmp ult i32 %a, 42 @@ -85,8 +85,8 @@ entry: %cond1 = icmp eq i32 %a, 42 br i1 %cond1, label %header, label %exit -; CHECK: edge entry -> header probability is 0x00000001 / 0x80000000 = 0.00% -; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> header probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge entry -> exit probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] header: br label %body @@ -94,9 +94,8 @@ body: %cond2 = icmp eq i32 %a, 42 br i1 %cond2, label %header, label %abort -; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00% -; CHECK: edge body -> abort probability is 0x40000000 / 0x80000000 = 50.00% - +; CHECK: edge body -> header probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge body -> abort probability is 0x00000800 / 0x80000000 = 0.00% abort: call void @abort() noreturn unreachable @@ -113,15 +112,15 @@ entry: %cmp = icmp sge i32 %idx, %limit br i1 %cmp, label %if.then, label %if.end -; CHECK: edge entry -> if.then probability is 0x00000001 / 0x80000000 = 0.00% -; CHECK: edge entry -> if.end probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> if.then probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge entry -> if.end probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] if.then: ; preds = %entry %exception = call i8* @__cxa_allocate_exception(i64 1) #0 invoke i32 @smallFunction(i32 %idx) to label %invoke.cont unwind label %lpad -; CHECK: edge if.then -> invoke.cont probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge if.then -> lpad probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge if.then -> invoke.cont probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge if.then -> lpad probability is 0x40000000 / 0x80000000 = 50.00% invoke.cont: ; preds = %if.then call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #1 diff --git a/llvm/test/Analysis/BranchProbabilityInfo/unreachable.ll b/llvm/test/Analysis/BranchProbabilityInfo/unreachable.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/BranchProbabilityInfo/unreachable.ll @@ -0,0 +1,154 @@ +; RUN: opt -analyze -branch-prob < %s | FileCheck %s +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +declare void @bar() cold + +; Both 'l1' and 'r1' has one edge leading to 'cold' and another one to +; 'unreachable' blocks. Check that 'cold' paths are preferred. Also ensure both +; paths from 'entry' block are equal. +define void @test1(i32 %0) { +;CHECK: edge entry -> l1 probability is 0x40000000 / 0x80000000 = 50.00% +;CHECK: edge entry -> r1 probability is 0x40000000 / 0x80000000 = 50.00% +;CHECK: edge l1 -> cold probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge l1 -> unreached probability is 0x00000000 / 0x80000000 = 0.00% +;CHECK: edge r1 -> unreached probability is 0x00000000 / 0x80000000 = 0.00% +;CHECK: edge r1 -> cold probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + br i1 undef, label %l1, label %r1 + +l1: + br i1 undef, label %cold, label %unreached + +r1: + br i1 undef, label %unreached, label %cold + +unreached: + unreachable + +cold: + call void @bar() + ret void +} + +; Both edges of 'l1' leads to 'cold' blocks while one edge of 'r1' leads to +; 'unreachable' block. Check that 'l1' has 50/50 while 'r1' has 0/100 +; distributuion. Also ensure both paths from 'entry' block are equal. +define void @test2(i32 %0) { +;CHECK: edge entry -> l1 probability is 0x40000000 / 0x80000000 = 50.00% +;CHECK: edge entry -> r1 probability is 0x40000000 / 0x80000000 = 50.00% +;CHECK: edge l1 -> cold probability is 0x40000000 / 0x80000000 = 50.00% +;CHECK: edge l1 -> cold2 probability is 0x40000000 / 0x80000000 = 50.00% +;CHECK: edge r1 -> unreached probability is 0x00000000 / 0x80000000 = 0.00% +;CHECK: edge r1 -> cold probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + br i1 undef, label %l1, label %r1 + +l1: + br i1 undef, label %cold, label %cold2 + +r1: + br i1 undef, label %unreached, label %cold + +unreached: + unreachable + +cold: + call void @bar() + ret void + +cold2: + call void @bar() + ret void +} + +; Both edges of 'r1' leads to 'unreachable' blocks while one edge of 'l1' leads to +; 'cold' block. Ensure that path leading to 'cold' block is preferred. +define void @test3(i32 %0) { +;CHECK: edge entry -> l1 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge entry -> r1 probability is 0x00000000 / 0x80000000 = 0.00% +;CHECK: edge l1 -> cold probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge l1 -> unreached probability is 0x00000000 / 0x80000000 = 0.00% +;CHECK: edge r1 -> unreached probability is 0x40000000 / 0x80000000 = 50.00% +;CHECK: edge r1 -> unreached2 probability is 0x40000000 / 0x80000000 = 50.00% + +entry: + br i1 undef, label %l1, label %r1 + +l1: + br i1 undef, label %cold, label %unreached + +r1: + br i1 undef, label %unreached, label %unreached2 + +unreached: + unreachable + +unreached2: + unreachable + +cold: + call void @bar() + ret void +} + +; Left edge of 'entry' leads to 'cold' block while right edge is 'normal' continuation. +; Check that we able to propagate 'cold' weight to 'entry' block. Also ensure +; both edges from 'l1' are equally likely. +define void @test4(i32 %0) { +;CHECK: edge entry -> l1 probability is 0x078780e3 / 0x80000000 = 5.88% +;CHECK: edge entry -> r1 probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] +;CHECK: edge l1 -> l2 probability is 0x40000000 / 0x80000000 = 50.00% +;CHECK: edge l1 -> r2 probability is 0x40000000 / 0x80000000 = 50.00% +;CHECK: edge l2 -> to.cold probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge r2 -> to.cold probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge to.cold -> cold probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + br i1 undef, label %l1, label %r1 + +l1: + br i1 undef, label %l2, label %r2 + +l2: + br label %to.cold + +r2: + br label %to.cold + +to.cold: + br label %cold + +r1: + ret void + +cold: + call void @bar() + ret void +} + +; Check that most likely path from 'entry' to 'l2' through 'r1' is preferred. +define void @test5(i32 %0) { +;CHECK: edge entry -> cold probability is 0x078780e3 / 0x80000000 = 5.88% +;CHECK: edge entry -> r1 probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] +;CHECK: edge cold -> l2 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge r1 -> l2 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge r1 -> unreached probability is 0x00000000 / 0x80000000 = 0.00% + +entry: + br i1 undef, label %cold, label %r1 + +cold: + call void @bar() + br label %l2 + +r1: + br i1 undef, label %l2, label %unreached + +l2: + ret void + +unreached: + unreachable +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-invoke-probabilities.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-invoke-probabilities.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-invoke-probabilities.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-invoke-probabilities.ll @@ -11,7 +11,7 @@ define void @pluto() align 2 personality i8* bitcast (i32 (...)* @hoge to i8*) { ; CHECK-LABEL: @pluto ; CHECK: bb.1.bb -; CHECK: successors: %bb.2(0x7ffff800), %bb.3(0x00000800) +; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; CHECK: EH_LABEL ; CHECK: G_BR %bb.2 diff --git a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll --- a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll +++ b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll @@ -14,7 +14,7 @@ define amdgpu_ps float @test_return_to_epilog_into_end_block(i32 inreg %a, float %b) #0 { ; GCN-LABEL: name: test_return_to_epilog_into_end_block ; GCN: bb.0.entry: - ; GCN: successors: %bb.1(0x7fffffff), %bb.2(0x00000001) + ; GCN: successors: %bb.1(0x80000000), %bb.2(0x00000000) ; GCN: liveins: $sgpr2, $vgpr0 ; GCN: S_CMP_LT_I32 killed renamable $sgpr2, 1, implicit-def $scc ; GCN: S_CBRANCH_SCC1 %bb.2, implicit killed $scc @@ -49,7 +49,7 @@ ; GCN: liveins: $vgpr0 ; GCN: S_BRANCH %bb.5 ; GCN: bb.2.else.if.cond: - ; GCN: successors: %bb.3(0x7fffffff), %bb.4(0x00000001) + ; GCN: successors: %bb.3(0x80000000), %bb.4(0x00000000) ; GCN: liveins: $sgpr3, $vgpr1 ; GCN: S_CMP_LT_I32 killed renamable $sgpr3, 1, implicit-def $scc ; GCN: S_CBRANCH_SCC1 %bb.4, implicit killed $scc diff --git a/llvm/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll b/llvm/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll --- a/llvm/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll +++ b/llvm/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll @@ -22,7 +22,7 @@ ; for.body -> for.cond.backedge (100%) ; -> cond.false.i (0%) ; CHECK: bb.1.for.body: -; CHECK: successors: %bb.2(0x80000000), %bb.4(0x00000001) +; CHECK: successors: %bb.2(0x80000000), %bb.4(0x00000000) for.body: br i1 undef, label %for.cond.backedge, label %lor.lhs.false.i, !prof !1 diff --git a/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll b/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll --- a/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll +++ b/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll @@ -168,7 +168,7 @@ ; CHECK-LABEL: cmp_slt0 ; CHECK: sub ; CHECK: cmn -; CHECK: bgt +; CHECK: ble %load = load i32, i32* @t, align 4 %sub = sub i32 %load, 17 %cmp = icmp slt i32 %sub, 0 diff --git a/llvm/test/CodeGen/ARM/v8m.base-jumptable_alignment.ll b/llvm/test/CodeGen/ARM/v8m.base-jumptable_alignment.ll --- a/llvm/test/CodeGen/ARM/v8m.base-jumptable_alignment.ll +++ b/llvm/test/CodeGen/ARM/v8m.base-jumptable_alignment.ll @@ -23,7 +23,7 @@ ; CHECK-NEXT: bne .LBB0_8 ; CHECK-NEXT: .LBB0_2: @ %for.cond14.preheader.us.i.i.i ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cbnz r0, .LBB0_7 +; CHECK-NEXT: cbnz r0, .LBB0_6 ; CHECK-NEXT: @ %bb.3: @ %for.cond14.preheader.us.i.i.i ; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: lsls r1, r0, #2 @@ -34,22 +34,22 @@ ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: .LJTI0_0: ; CHECK-NEXT: b.w .LBB0_5 -; CHECK-NEXT: b.w .LBB0_7 ; CHECK-NEXT: b.w .LBB0_6 -; CHECK-NEXT: b.w .LBB0_8 -; CHECK-NEXT: b.w .LBB0_7 -; CHECK-NEXT: b.w .LBB0_7 -; CHECK-NEXT: b.w .LBB0_7 -; CHECK-NEXT: b.w .LBB0_7 -; CHECK-NEXT: b.w .LBB0_7 -; CHECK-NEXT: b.w .LBB0_7 ; CHECK-NEXT: b.w .LBB0_7 +; CHECK-NEXT: b.w .LBB0_8 +; CHECK-NEXT: b.w .LBB0_6 +; CHECK-NEXT: b.w .LBB0_6 +; CHECK-NEXT: b.w .LBB0_6 +; CHECK-NEXT: b.w .LBB0_6 +; CHECK-NEXT: b.w .LBB0_6 +; CHECK-NEXT: b.w .LBB0_6 +; CHECK-NEXT: b.w .LBB0_6 ; CHECK-NEXT: b.w .LBB0_5 ; CHECK-NEXT: .LBB0_5: @ %for.cond14.preheader.us.i.i.i ; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: b .LBB0_2 -; CHECK-NEXT: .LBB0_6: @ %lbl_1394.i.i.i.loopexit -; CHECK-NEXT: .LBB0_7: @ %func_1.exit.loopexit +; CHECK-NEXT: .LBB0_6: @ %func_1.exit.loopexit +; CHECK-NEXT: .LBB0_7: @ %lbl_1394.i.i.i.loopexit ; CHECK-NEXT: .LBB0_8: @ %for.end476.i.i.i.loopexit entry: %0 = load volatile i32**, i32*** @g_566, align 4 diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll --- a/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll +++ b/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll @@ -23,8 +23,8 @@ ; CHECK-NEXT: mfcr r12 ; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stw r12, 8(r1) -; CHECK-NEXT: stdu r1, -80(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: stdu r1, -64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: .cfi_offset r29, -24 ; CHECK-NEXT: .cfi_offset r30, -16 @@ -32,32 +32,31 @@ ; CHECK-NEXT: .cfi_offset cr3, 8 ; CHECK-NEXT: .cfi_offset cr4, 8 ; CHECK-NEXT: lwz r3, 0(r3) -; CHECK-NEXT: std r29, 56(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, 64(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, 40(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, 48(r1) # 8-byte Folded Spill ; CHECK-NEXT: paddi r29, 0, .LJTI0_0@PCREL, 1 ; CHECK-NEXT: srwi r4, r3, 4 ; CHECK-NEXT: srwi r3, r3, 5 ; CHECK-NEXT: andi. r4, r4, 1 ; CHECK-NEXT: li r4, 0 -; CHECK-NEXT: crmove 4*cr4+lt, gt +; CHECK-NEXT: crmove 4*cr2+gt, gt ; CHECK-NEXT: andi. r3, r3, 1 -; CHECK-NEXT: setnbc r3, gt -; CHECK-NEXT: stw r3, 52(r1) +; CHECK-NEXT: crmove 4*cr2+lt, gt ; CHECK-NEXT: cmplwi cr3, r3, 336 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: sldi r30, r3, 2 ; CHECK-NEXT: b .LBB0_2 -; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: # %bb43 ; CHECK-NEXT: # ; CHECK-NEXT: bl call_1@notoc ; CHECK-NEXT: li r4, 0 -; CHECK-NEXT: setnbc r3, 4*cr2+eq +; CHECK-NEXT: setnbc r3, 4*cr4+eq ; CHECK-NEXT: stb r4, 0(r3) ; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: # %bb5 ; CHECK-NEXT: # -; CHECK-NEXT: bc 12, 4*cr4+lt, .LBB0_31 +; CHECK-NEXT: bc 12, 4*cr2+gt, .LBB0_31 ; CHECK-NEXT: # %bb.3: # %bb10 ; CHECK-NEXT: # ; CHECK-NEXT: bgt cr3, .LBB0_5 @@ -66,7 +65,7 @@ ; CHECK-NEXT: mr r3, r4 ; CHECK-NEXT: lwz r5, 0(r3) ; CHECK-NEXT: rlwinm r4, r5, 0, 21, 22 -; CHECK-NEXT: cmpwi cr2, r4, 512 +; CHECK-NEXT: cmpwi cr4, r4, 512 ; CHECK-NEXT: lwax r4, r30, r29 ; CHECK-NEXT: add r4, r4, r29 ; CHECK-NEXT: mtctr r4 @@ -105,11 +104,11 @@ ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_12 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_13: # %bb47 +; CHECK-NEXT: .LBB0_13: # %bb61 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_13 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_14: # %bb58 +; CHECK-NEXT: .LBB0_14: # %bb47 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_14 ; CHECK-NEXT: .p2align 4 @@ -121,51 +120,51 @@ ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_16 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_17: # %bb23 +; CHECK-NEXT: .LBB0_17: # %bb59 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_17 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_18: # %bb60 +; CHECK-NEXT: .LBB0_18: # %bb46 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_18 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_19: # %bb59 +; CHECK-NEXT: .LBB0_19: # %bb49 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_19 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_20: # %bb46 +; CHECK-NEXT: .LBB0_20: # %bb57 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_20 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_21: # %bb49 +; CHECK-NEXT: .LBB0_21: # %bb18 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_21 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_22: # %bb57 +; CHECK-NEXT: .LBB0_22: # %bb58 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_22 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_23: # %bb56 +; CHECK-NEXT: .LBB0_23: # %bb23 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_23 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_24: # %bb20 +; CHECK-NEXT: .LBB0_24: # %bb60 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_24 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_25: # %bb18 +; CHECK-NEXT: .LBB0_25: # %bb55 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_25 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_26: # %bb61 +; CHECK-NEXT: .LBB0_26: # %bb62 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_26 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_27: # %bb55 +; CHECK-NEXT: .LBB0_27: # %bb56 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_27 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_28: # %bb62 +; CHECK-NEXT: .LBB0_28: # %bb20 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_28 ; CHECK-NEXT: .p2align 4 @@ -177,9 +176,9 @@ ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_30 ; CHECK-NEXT: .LBB0_31: # %bb9 -; CHECK-NEXT: ld r30, 64(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, 56(r1) # 8-byte Folded Reload -; CHECK-NEXT: addi r1, r1, 80 +; CHECK-NEXT: ld r30, 48(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 40(r1) # 8-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 64 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: lwz r12, 8(r1) ; CHECK-NEXT: mtlr r0 @@ -188,29 +187,26 @@ ; CHECK-NEXT: mtocrf 8, r12 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB0_32: # %bb29 -; CHECK-NEXT: lwz r4, 52(r1) -; CHECK-NEXT: cmpwi cr4, r3, 0 -; CHECK-NEXT: setnbc r30, 4*cr2+eq -; CHECK-NEXT: # implicit-def: $cr2lt -; CHECK-NEXT: mfocrf r3, 32 +; CHECK-NEXT: mcrf cr0, cr4 ; CHECK-NEXT: cmpwi cr3, r5, 366 +; CHECK-NEXT: cmpwi cr4, r3, 0 ; CHECK-NEXT: li r29, 0 -; CHECK-NEXT: rlwimi r3, r4, 24, 8, 8 -; CHECK-NEXT: mtocrf 32, r3 +; CHECK-NEXT: setnbc r30, eq +; CHECK-NEXT: bc 12, 4*cr2+lt, .LBB0_36 ; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB0_33: # %bb32 -; CHECK-NEXT: # -; CHECK-NEXT: bc 4, 4*cr2+lt, .LBB0_35 -; CHECK-NEXT: # %bb.34: # %bb33 -; CHECK-NEXT: # -; CHECK-NEXT: stb r29, 0(r30) -; CHECK-NEXT: .LBB0_35: # %bb36 -; CHECK-NEXT: # -; CHECK-NEXT: bc 4, 4*cr4+eq, .LBB0_33 -; CHECK-NEXT: # %bb.36: # %bb39 -; CHECK-NEXT: # +; CHECK-NEXT: .LBB0_33: # %bb36 +; CHECK-NEXT: bc 12, 4*cr4+eq, .LBB0_35 +; CHECK-NEXT: .LBB0_34: # %bb32 +; CHECK-NEXT: bc 4, 4*cr2+lt, .LBB0_33 +; CHECK-NEXT: b .LBB0_36 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB0_35: # %bb39 ; CHECK-NEXT: bl call_2@notoc -; CHECK-NEXT: b .LBB0_33 +; CHECK-NEXT: bc 4, 4*cr2+lt, .LBB0_33 +; CHECK-NEXT: .LBB0_36: # %bb33 +; CHECK-NEXT: stb r29, 0(r30) +; CHECK-NEXT: bc 4, 4*cr4+eq, .LBB0_34 +; CHECK-NEXT: b .LBB0_35 ; ; CHECK-BE-LABEL: P10_Spill_CR_GT: ; CHECK-BE: # %bb.0: # %bb @@ -218,8 +214,8 @@ ; CHECK-BE-NEXT: mfcr r12 ; CHECK-BE-NEXT: std r0, 16(r1) ; CHECK-BE-NEXT: stw r12, 8(r1) -; CHECK-BE-NEXT: stdu r1, -160(r1) -; CHECK-BE-NEXT: .cfi_def_cfa_offset 160 +; CHECK-BE-NEXT: stdu r1, -144(r1) +; CHECK-BE-NEXT: .cfi_def_cfa_offset 144 ; CHECK-BE-NEXT: .cfi_offset lr, 16 ; CHECK-BE-NEXT: .cfi_offset r29, -24 ; CHECK-BE-NEXT: .cfi_offset r30, -16 @@ -227,34 +223,33 @@ ; CHECK-BE-NEXT: .cfi_offset cr2, 8 ; CHECK-BE-NEXT: .cfi_offset cr2, 8 ; CHECK-BE-NEXT: lwz r3, 0(r3) -; CHECK-BE-NEXT: std r29, 136(r1) # 8-byte Folded Spill -; CHECK-BE-NEXT: std r30, 144(r1) # 8-byte Folded Spill +; CHECK-BE-NEXT: std r29, 120(r1) # 8-byte Folded Spill +; CHECK-BE-NEXT: std r30, 128(r1) # 8-byte Folded Spill ; CHECK-BE-NEXT: srwi r4, r3, 4 ; CHECK-BE-NEXT: srwi r3, r3, 5 ; CHECK-BE-NEXT: andi. r4, r4, 1 ; CHECK-BE-NEXT: li r4, 0 -; CHECK-BE-NEXT: crmove 4*cr4+lt, gt +; CHECK-BE-NEXT: crmove 4*cr2+gt, gt ; CHECK-BE-NEXT: andi. r3, r3, 1 -; CHECK-BE-NEXT: setnbc r3, gt -; CHECK-BE-NEXT: stw r3, 132(r1) +; CHECK-BE-NEXT: crmove 4*cr2+lt, gt ; CHECK-BE-NEXT: cmplwi cr3, r3, 336 ; CHECK-BE-NEXT: li r3, 0 ; CHECK-BE-NEXT: sldi r30, r3, 2 ; CHECK-BE-NEXT: addis r3, r2, .LC0@toc@ha ; CHECK-BE-NEXT: ld r29, .LC0@toc@l(r3) ; CHECK-BE-NEXT: b .LBB0_2 -; CHECK-BE-NEXT: .p2align 4 ; CHECK-BE-NEXT: .LBB0_1: # %bb43 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: bl call_1 ; CHECK-BE-NEXT: nop ; CHECK-BE-NEXT: li r4, 0 -; CHECK-BE-NEXT: setnbc r3, 4*cr2+eq +; CHECK-BE-NEXT: setnbc r3, 4*cr4+eq ; CHECK-BE-NEXT: stb r4, 0(r3) ; CHECK-BE-NEXT: li r4, 0 +; CHECK-BE-NEXT: .p2align 4 ; CHECK-BE-NEXT: .LBB0_2: # %bb5 ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: bc 12, 4*cr4+lt, .LBB0_31 +; CHECK-BE-NEXT: bc 12, 4*cr2+gt, .LBB0_31 ; CHECK-BE-NEXT: # %bb.3: # %bb10 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: bgt cr3, .LBB0_5 @@ -263,7 +258,7 @@ ; CHECK-BE-NEXT: mr r3, r4 ; CHECK-BE-NEXT: lwz r5, 0(r3) ; CHECK-BE-NEXT: rlwinm r4, r5, 0, 21, 22 -; CHECK-BE-NEXT: cmpwi cr2, r4, 512 +; CHECK-BE-NEXT: cmpwi cr4, r4, 512 ; CHECK-BE-NEXT: lwax r4, r30, r29 ; CHECK-BE-NEXT: add r4, r4, r29 ; CHECK-BE-NEXT: mtctr r4 @@ -302,11 +297,11 @@ ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_12 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_13: # %bb47 +; CHECK-BE-NEXT: .LBB0_13: # %bb61 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_13 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_14: # %bb58 +; CHECK-BE-NEXT: .LBB0_14: # %bb47 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_14 ; CHECK-BE-NEXT: .p2align 4 @@ -318,51 +313,51 @@ ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_16 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_17: # %bb23 +; CHECK-BE-NEXT: .LBB0_17: # %bb59 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_17 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_18: # %bb60 +; CHECK-BE-NEXT: .LBB0_18: # %bb46 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_18 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_19: # %bb59 +; CHECK-BE-NEXT: .LBB0_19: # %bb49 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_19 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_20: # %bb46 +; CHECK-BE-NEXT: .LBB0_20: # %bb57 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_20 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_21: # %bb49 +; CHECK-BE-NEXT: .LBB0_21: # %bb18 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_21 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_22: # %bb57 +; CHECK-BE-NEXT: .LBB0_22: # %bb58 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_22 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_23: # %bb56 +; CHECK-BE-NEXT: .LBB0_23: # %bb23 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_23 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_24: # %bb20 +; CHECK-BE-NEXT: .LBB0_24: # %bb60 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_24 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_25: # %bb18 +; CHECK-BE-NEXT: .LBB0_25: # %bb55 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_25 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_26: # %bb61 +; CHECK-BE-NEXT: .LBB0_26: # %bb62 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_26 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_27: # %bb55 +; CHECK-BE-NEXT: .LBB0_27: # %bb56 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_27 ; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: .LBB0_28: # %bb62 +; CHECK-BE-NEXT: .LBB0_28: # %bb20 ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_28 ; CHECK-BE-NEXT: .p2align 4 @@ -374,9 +369,9 @@ ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: b .LBB0_30 ; CHECK-BE-NEXT: .LBB0_31: # %bb9 -; CHECK-BE-NEXT: ld r30, 144(r1) # 8-byte Folded Reload -; CHECK-BE-NEXT: ld r29, 136(r1) # 8-byte Folded Reload -; CHECK-BE-NEXT: addi r1, r1, 160 +; CHECK-BE-NEXT: ld r30, 128(r1) # 8-byte Folded Reload +; CHECK-BE-NEXT: ld r29, 120(r1) # 8-byte Folded Reload +; CHECK-BE-NEXT: addi r1, r1, 144 ; CHECK-BE-NEXT: ld r0, 16(r1) ; CHECK-BE-NEXT: lwz r12, 8(r1) ; CHECK-BE-NEXT: mtlr r0 @@ -385,30 +380,27 @@ ; CHECK-BE-NEXT: mtocrf 8, r12 ; CHECK-BE-NEXT: blr ; CHECK-BE-NEXT: .LBB0_32: # %bb29 -; CHECK-BE-NEXT: lwz r4, 132(r1) -; CHECK-BE-NEXT: cmpwi cr4, r3, 0 -; CHECK-BE-NEXT: setnbc r30, 4*cr2+eq -; CHECK-BE-NEXT: # implicit-def: $cr2lt -; CHECK-BE-NEXT: mfocrf r3, 32 +; CHECK-BE-NEXT: mcrf cr0, cr4 ; CHECK-BE-NEXT: cmpwi cr3, r5, 366 +; CHECK-BE-NEXT: cmpwi cr4, r3, 0 ; CHECK-BE-NEXT: li r29, 0 -; CHECK-BE-NEXT: rlwimi r3, r4, 24, 8, 8 -; CHECK-BE-NEXT: mtocrf 32, r3 -; CHECK-BE-NEXT: .p2align 5 -; CHECK-BE-NEXT: .LBB0_33: # %bb32 -; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: bc 4, 4*cr2+lt, .LBB0_35 -; CHECK-BE-NEXT: # %bb.34: # %bb33 -; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stb r29, 0(r30) -; CHECK-BE-NEXT: .LBB0_35: # %bb36 -; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: bc 4, 4*cr4+eq, .LBB0_33 -; CHECK-BE-NEXT: # %bb.36: # %bb39 -; CHECK-BE-NEXT: # +; CHECK-BE-NEXT: setnbc r30, eq +; CHECK-BE-NEXT: bc 12, 4*cr2+lt, .LBB0_36 +; CHECK-BE-NEXT: .p2align 4 +; CHECK-BE-NEXT: .LBB0_33: # %bb36 +; CHECK-BE-NEXT: bc 12, 4*cr4+eq, .LBB0_35 +; CHECK-BE-NEXT: .LBB0_34: # %bb32 +; CHECK-BE-NEXT: bc 4, 4*cr2+lt, .LBB0_33 +; CHECK-BE-NEXT: b .LBB0_36 +; CHECK-BE-NEXT: .p2align 4 +; CHECK-BE-NEXT: .LBB0_35: # %bb39 ; CHECK-BE-NEXT: bl call_2 ; CHECK-BE-NEXT: nop -; CHECK-BE-NEXT: b .LBB0_33 +; CHECK-BE-NEXT: bc 4, 4*cr2+lt, .LBB0_33 +; CHECK-BE-NEXT: .LBB0_36: # %bb33 +; CHECK-BE-NEXT: stb r29, 0(r30) +; CHECK-BE-NEXT: bc 4, 4*cr4+eq, .LBB0_34 +; CHECK-BE-NEXT: b .LBB0_35 bb: %tmp = load i32, i32* undef, align 8 %tmp1 = and i32 %tmp, 16 diff --git a/llvm/test/CodeGen/PowerPC/pr36292.ll b/llvm/test/CodeGen/PowerPC/pr36292.ll --- a/llvm/test/CodeGen/PowerPC/pr36292.ll +++ b/llvm/test/CodeGen/PowerPC/pr36292.ll @@ -15,7 +15,8 @@ ; CHECK-NEXT: ld 29, 0(3) ; CHECK-NEXT: ld 30, 32(1) ; CHECK-NEXT: cmpld 30, 29 -; CHECK-NEXT: bge 0, .LBB0_2 +; CHECK-NEXT: bge- 0, .LBB0_2 +; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_1: # %bounds.ok ; CHECK-NEXT: # ; CHECK-NEXT: lfsx 2, 0, 3 @@ -25,7 +26,7 @@ ; CHECK-NEXT: addi 30, 30, 1 ; CHECK-NEXT: stfsx 1, 0, 3 ; CHECK-NEXT: cmpld 30, 29 -; CHECK-NEXT: blt 0, .LBB0_1 +; CHECK-NEXT: blt+ 0, .LBB0_1 ; CHECK-NEXT: .LBB0_2: # %bounds.fail ; CHECK-NEXT: std 30, 32(1) %pos = alloca i64, align 8 diff --git a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll --- a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll +++ b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll @@ -44,6 +44,7 @@ ; CHECK-NEXT: addi 8, 8, -1 ; CHECK-NEXT: lbz 5, 0(5) ; CHECK-NEXT: bdz .LBB0_4 +; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_3: ; CHECK-NEXT: addi 3, 3, 1 ; CHECK-NEXT: clrldi 10, 8, 32 diff --git a/llvm/test/CodeGen/SPARC/missinglabel.ll b/llvm/test/CodeGen/SPARC/missinglabel.ll --- a/llvm/test/CodeGen/SPARC/missinglabel.ll +++ b/llvm/test/CodeGen/SPARC/missinglabel.ll @@ -13,13 +13,13 @@ ; CHECK-NEXT: nop ; CHECK-NEXT: ba .LBB0_1 ; CHECK-NEXT: nop +; CHECK-NEXT: .LBB0_1: ! %cond.false ; CHECK-NEXT: .LBB0_2: ! %targetblock ; CHECK-NEXT: mov %g0, %o0 ; CHECK-NEXT: cmp %o0, 0 ; CHECK-NEXT: bne .LBB0_4 ; CHECK-NEXT: nop ; CHECK-NEXT: ! %bb.3: ! %cond.false.i83 -; CHECK-NEXT: .LBB0_1: ! %cond.false ; CHECK-NEXT: .LBB0_4: ! %exit.i85 entry: %cmp = icmp eq i64 %a0, 0 diff --git a/llvm/test/CodeGen/SystemZ/debuginstr-cgp.mir b/llvm/test/CodeGen/SystemZ/debuginstr-cgp.mir --- a/llvm/test/CodeGen/SystemZ/debuginstr-cgp.mir +++ b/llvm/test/CodeGen/SystemZ/debuginstr-cgp.mir @@ -4,9 +4,9 @@ # RUN: llc %s -mtriple=s390x-linux-gnu -mcpu=z13 -start-before=codegenprepare \ # RUN: -stop-after codegenprepare -o - | FileCheck %s # -# CHECK-LABEL: bb2: +# CHECK-LABEL: bb1: # CHECK: ret -# CHECK-LABEL: bb4: +# CHECK-LABEL: bb2: # CHECK: ret diff --git a/llvm/test/CodeGen/WebAssembly/switch-unreachable-default.ll b/llvm/test/CodeGen/WebAssembly/switch-unreachable-default.ll --- a/llvm/test/CodeGen/WebAssembly/switch-unreachable-default.ll +++ b/llvm/test/CodeGen/WebAssembly/switch-unreachable-default.ll @@ -43,10 +43,10 @@ ; CHECK: br_if 0 ; CHECK: block ; CHECK: block -; CHECK: br_table {1, 1, 1, 1, 1, 1, 1, 0} +; CHECK: br_table {1, 1, 0} ; CHECK: .LBB1_2 ; CHECK: end_block -; CHECK: br_table {0, 0, 0} +; CHECK: br_table {0, 0, 0, 0, 0, 0, 0, 0} ; CHECK: .LBB1_3 ; CHECK: end_block ; CHECK: unreachable diff --git a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll --- a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll +++ b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll @@ -47,7 +47,6 @@ ; CHECK-NEXT: movb $1, %bh ; CHECK-NEXT: movl $274877907, %ebp ## imm = 0x10624DD3 ; CHECK-NEXT: jmp LBB0_5 -; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_23: ## %bb7806 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: Ltmp16: @@ -153,15 +152,6 @@ ; CHECK-NEXT: calll __ZN12wxStringBase10ConcatSelfEmPKwm ; CHECK-NEXT: Ltmp11: ; CHECK-NEXT: jmp LBB0_5 -; CHECK-NEXT: LBB0_22: ## %bb5968 -; CHECK-NEXT: Ltmp2: -; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: movl $0, (%esp) -; CHECK-NEXT: calll __ZN8wxString6FormatEPKwz -; CHECK-NEXT: subl $4, %esp -; CHECK-NEXT: Ltmp3: -; CHECK-NEXT: jmp LBB0_27 ; CHECK-NEXT: LBB0_9: ## %bb5657 ; CHECK-NEXT: Ltmp13: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -170,6 +160,15 @@ ; CHECK-NEXT: movl %eax, (%esp) ; CHECK-NEXT: calll __ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE ; CHECK-NEXT: Ltmp14: +; CHECK-NEXT: jmp LBB0_27 +; CHECK-NEXT: LBB0_22: ## %bb5968 +; CHECK-NEXT: Ltmp2: +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, (%esp) +; CHECK-NEXT: calll __ZN8wxString6FormatEPKwz +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: Ltmp3: ; CHECK-NEXT: LBB0_27: ## %bb115.critedge.i ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: addl $28, %esp diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll --- a/llvm/test/CodeGen/X86/block-placement.ll +++ b/llvm/test/CodeGen/X86/block-placement.ll @@ -358,11 +358,11 @@ ; CHECK: %loop.header ; CHECK: %loop.body1 ; CHECK: %loop.body2 -; CHECK: %loop.body3 -; CHECK: %loop.inner1.begin ; CHECK: %loop.body4 ; CHECK: %loop.inner2.begin ; CHECK: %loop.inner2.begin +; CHECK: %loop.body3 +; CHECK: %loop.inner1.begin ; CHECK: %bail entry: diff --git a/llvm/test/CodeGen/X86/misched_phys_reg_assign_order.ll b/llvm/test/CodeGen/X86/misched_phys_reg_assign_order.ll --- a/llvm/test/CodeGen/X86/misched_phys_reg_assign_order.ll +++ b/llvm/test/CodeGen/X86/misched_phys_reg_assign_order.ll @@ -27,10 +27,10 @@ ; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: lock cmpxchg8b (%esi) ; CHECK-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; CHECK-NEXT: jne .LBB0_1 -; CHECK-NEXT: # %bb.2: # %k.end -; CHECK-NEXT: .LBB0_1: # %. +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.1: # %. ; CHECK-NEXT: calll m +; CHECK-NEXT: .LBB0_2: # %k.end entry: %p = load i8*, i8** @f %v1 = load atomic i8, i8* %p monotonic, align 1 diff --git a/llvm/test/CodeGen/X86/pr27501.ll b/llvm/test/CodeGen/X86/pr27501.ll --- a/llvm/test/CodeGen/X86/pr27501.ll +++ b/llvm/test/CodeGen/X86/pr27501.ll @@ -6,15 +6,15 @@ bb: invoke void @may_throw(i32 1) to label %postinvoke unwind label %cleanuppad +; CHECK: movq %rcx, [[SpillLoc:.*\(%rbp\)]] ; CHECK: movl $1, %ecx ; CHECK: callq may_throw postinvoke: ; preds = %bb store i64 19, i64* %result.repack, align 8 - -; CHECK: movq $19, (%rsi) +; CHECK: movq [[SpillLoc]], [[R1:%r..]] +; CHECK: movq $19, ([[R1]]) ; CHECK: movl $2, %ecx -; CHECK-NEXT: movq %rsi, -8(%rbp) ; CHECK-NEXT: callq may_throw invoke void @may_throw(i32 2) to label %assertFailed unwind label %catch.dispatch @@ -38,8 +38,8 @@ postinvoke27: ; preds = %try.success.or.caught store i64 42, i64* %result.repack, align 8 -; CHECK: movq -8(%rbp), %[[reload:r..]] -; CHECK-NEXT: movq $42, (%[[reload]]) +; CHECK: movq [[SpillLoc]], [[R2:%r..]] +; CHECK-NEXT: movq $42, ([[R2]]) ret void cleanuppad24: ; preds = %try.success.or.caught diff --git a/llvm/test/CodeGen/X86/pr37916.ll b/llvm/test/CodeGen/X86/pr37916.ll --- a/llvm/test/CodeGen/X86/pr37916.ll +++ b/llvm/test/CodeGen/X86/pr37916.ll @@ -7,7 +7,7 @@ define void @fn1() local_unnamed_addr { ; CHECK-LABEL: fn1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: .LBB0_1: # %if.end +; CHECK: .LBB0_1: # %if.end ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movl a+4, %eax ; CHECK-NEXT: orl a, %eax diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll --- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -68,7 +68,7 @@ ; CHECK-NEXT: je LBB0_55 ; CHECK-NEXT: ## %bb.6: ## %SyTime.exit2720 ; CHECK-NEXT: movq %rdx, %rbx -; CHECK-NEXT: movq %rdi, %r14 +; CHECK-NEXT: movq %rdi, %rbp ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: cmpq %rax, %rcx @@ -80,8 +80,7 @@ ; CHECK-NEXT: LBB0_8: ## %while.body.preheader ; CHECK-NEXT: imulq $1040, %rbx, %rax ## imm = 0x410 ; CHECK-NEXT: movq _syBuf@{{.*}}(%rip), %rcx -; CHECK-NEXT: leaq 8(%rcx,%rax), %rax -; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: leaq 8(%rcx,%rax), %rdx ; CHECK-NEXT: movl $1, %r15d ; CHECK-NEXT: movq _syCTRO@{{.*}}(%rip), %rax ; CHECK-NEXT: movb $1, %cl @@ -92,70 +91,71 @@ ; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: jne LBB0_9 ; CHECK-NEXT: ## %bb.10: ## %do.end -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: testb %bpl, %bpl +; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: xorl %r13d, %r13d +; CHECK-NEXT: testb %r13b, %r13b ; CHECK-NEXT: jne LBB0_11 ; CHECK-NEXT: ## %bb.12: ## %while.body200.preheader -; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: leaq {{.*}}(%rip), %r13 -; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill ; CHECK-NEXT: xorl %r12d, %r12d -; CHECK-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: leaq {{.*}}(%rip), %rdx +; CHECK-NEXT: leaq {{.*}}(%rip), %rbx +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: xorl %r14d, %r14d ; CHECK-NEXT: jmp LBB0_13 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_20: ## %sw.bb256 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl %ebp, %r12d +; CHECK-NEXT: movl %r13d, %r14d ; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: decl %r15d ; CHECK-NEXT: testl %r15d, %r15d -; CHECK-NEXT: movl %r12d, %ebp +; CHECK-NEXT: movl %r14d, %r13d ; CHECK-NEXT: jle LBB0_22 ; CHECK-NEXT: LBB0_13: ## %while.body200 ; CHECK-NEXT: ## =>This Loop Header: Depth=1 ; CHECK-NEXT: ## Child Loop BB0_29 Depth 2 ; CHECK-NEXT: ## Child Loop BB0_38 Depth 2 -; CHECK-NEXT: leal -268(%rbp), %eax +; CHECK-NEXT: leal -268(%r13), %eax ; CHECK-NEXT: cmpl $105, %eax ; CHECK-NEXT: ja LBB0_14 ; CHECK-NEXT: ## %bb.56: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movslq (%r13,%rax,4), %rax -; CHECK-NEXT: addq %r13, %rax +; CHECK-NEXT: movslq (%rbx,%rax,4), %rax +; CHECK-NEXT: addq %rbx, %rax ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: movl %ebp, %r12d +; CHECK-NEXT: testb %r12b, %r12b +; CHECK-NEXT: movl %r13d, %r14d ; CHECK-NEXT: jne LBB0_21 ; CHECK-NEXT: jmp LBB0_55 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_14: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leal 1(%rbp), %eax +; CHECK-NEXT: leal 1(%r13), %eax ; CHECK-NEXT: cmpl $21, %eax ; CHECK-NEXT: ja LBB0_20 ; CHECK-NEXT: ## %bb.15: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $-1, %r12d -; CHECK-NEXT: leaq {{.*}}(%rip), %rcx -; CHECK-NEXT: movslq (%rcx,%rax,4), %rax -; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: movl $-1, %r14d +; CHECK-NEXT: movslq (%rdx,%rax,4), %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: LBB0_18: ## %while.cond201.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $1, %r12d +; CHECK-NEXT: movl $1, %r14d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_26: ## %sw.bb474 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: ## implicit-def: $r14 +; CHECK-NEXT: testb %r12b, %r12b +; CHECK-NEXT: ## implicit-def: $rbp ; CHECK-NEXT: jne LBB0_34 ; CHECK-NEXT: ## %bb.27: ## %do.body479.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: ## implicit-def: $r14 +; CHECK-NEXT: testb %r12b, %r12b +; CHECK-NEXT: ## implicit-def: $rbp ; CHECK-NEXT: jne LBB0_34 ; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 @@ -164,8 +164,8 @@ ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_32: ## %do.body479.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 -; CHECK-NEXT: leaq 1(%r14), %rax -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: leaq 1(%rbp), %rax +; CHECK-NEXT: testb %r12b, %r12b ; CHECK-NEXT: je LBB0_33 ; CHECK-NEXT: LBB0_29: ## %land.rhs485 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 @@ -174,14 +174,14 @@ ; CHECK-NEXT: js LBB0_55 ; CHECK-NEXT: ## %bb.30: ## %cond.true.i.i2780 ; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 -; CHECK-NEXT: movq %rax, %r14 -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: movq %rax, %rbp +; CHECK-NEXT: testb %r12b, %r12b ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: ## %bb.31: ## %lor.rhs500 ; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 ; CHECK-NEXT: movl $256, %esi ## imm = 0x100 ; CHECK-NEXT: callq ___maskrune -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: testb %r12b, %r12b ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: jmp LBB0_34 ; CHECK-NEXT: LBB0_45: ## %sw.bb1134 @@ -192,22 +192,22 @@ ; CHECK-NEXT: jb LBB0_55 ; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill -; CHECK-NEXT: movl $268, %r12d ## imm = 0x10C +; CHECK-NEXT: movl $268, %r14d ## imm = 0x10C ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_40: ## %sw.bb566 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $20, %r12d +; CHECK-NEXT: movl $20, %r14d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_19: ## %sw.bb243 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $2, %r12d +; CHECK-NEXT: movl $2, %r14d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_33: ## %if.end517.loopexitsplit ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: incq %r14 +; CHECK-NEXT: incq %rbp ; CHECK-NEXT: LBB0_34: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leal -324(%r12), %eax +; CHECK-NEXT: leal -324(%r14), %eax ; CHECK-NEXT: cmpl $59, %eax ; CHECK-NEXT: ja LBB0_35 ; CHECK-NEXT: ## %bb.57: ## %if.end517 @@ -217,11 +217,11 @@ ; CHECK-NEXT: jb LBB0_38 ; CHECK-NEXT: LBB0_35: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $11, %r12d +; CHECK-NEXT: cmpl $11, %r14d ; CHECK-NEXT: je LBB0_38 ; CHECK-NEXT: ## %bb.36: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $24, %r12d +; CHECK-NEXT: cmpl $24, %r14d ; CHECK-NEXT: je LBB0_38 ; CHECK-NEXT: ## %bb.37: ## %if.then532 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 @@ -231,14 +231,14 @@ ; CHECK-NEXT: LBB0_38: ## %for.cond534 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: testb %r12b, %r12b ; CHECK-NEXT: jne LBB0_38 ; CHECK-NEXT: ## %bb.39: ## %for.cond542.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: movb $0, (%r14) -; CHECK-NEXT: movl %ebp, %r12d -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload +; CHECK-NEXT: testb %r12b, %r12b +; CHECK-NEXT: movb $0, (%rbp) +; CHECK-NEXT: movl %r13d, %r14d +; CHECK-NEXT: leaq {{.*}}(%rip), %rdx ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_42: ## %while.cond864 @@ -254,30 +254,32 @@ ; CHECK-NEXT: jmp LBB0_25 ; CHECK-NEXT: LBB0_11: ; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill -; CHECK-NEXT: xorl %r12d, %r12d +; CHECK-NEXT: xorl %r14d, %r14d ; CHECK-NEXT: LBB0_22: ## %while.end1465 -; CHECK-NEXT: incl %r12d -; CHECK-NEXT: cmpl $16, %r12d +; CHECK-NEXT: incl %r14d +; CHECK-NEXT: cmpl $16, %r14d ; CHECK-NEXT: ja LBB0_50 ; CHECK-NEXT: ## %bb.23: ## %while.end1465 ; CHECK-NEXT: movl $83969, %eax ## imm = 0x14801 -; CHECK-NEXT: btl %r12d, %eax +; CHECK-NEXT: btl %r14d, %eax ; CHECK-NEXT: jae LBB0_50 ; CHECK-NEXT: ## %bb.24: -; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload ; CHECK-NEXT: LBB0_48: ## %if.then1477 ; CHECK-NEXT: movl $1, %edx ; CHECK-NEXT: callq _write -; CHECK-NEXT: subq %rbx, %r14 +; CHECK-NEXT: subq %rbp, %rbx ; CHECK-NEXT: movq _syHistory@{{.*}}(%rip), %rax -; CHECK-NEXT: leaq 8189(%r14,%rax), %rax +; CHECK-NEXT: leaq 8189(%rbx,%rax), %rax ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_49: ## %for.body1723 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: decq %rax ; CHECK-NEXT: jmp LBB0_49 ; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit -; CHECK-NEXT: movq %r14, %rbx +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload +; CHECK-NEXT: movq %rbx, %rbp ; CHECK-NEXT: jmp LBB0_48 ; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader ; CHECK-NEXT: xorl %eax, %eax @@ -298,17 +300,18 @@ ; CHECK-NEXT: ## %bb.51: ## %for.body1664.lr.ph ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ebp ## 4-byte Reload ; CHECK-NEXT: jne LBB0_54 ; CHECK-NEXT: ## %bb.52: ## %while.body1679.preheader -; CHECK-NEXT: incl {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: incl %ebp +; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_53: ## %while.body1679 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload -; CHECK-NEXT: movq (%rax), %rdi +; CHECK-NEXT: movq (%rbx), %rdi ; CHECK-NEXT: callq _fileno -; CHECK-NEXT: movslq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 4-byte Folded Reload -; CHECK-NEXT: leal 1(%rax), %ecx -; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movslq %ebp, %rax +; CHECK-NEXT: leal 1(%rax), %ebp ; CHECK-NEXT: cmpq %rax, %rax ; CHECK-NEXT: jl LBB0_53 ; CHECK-NEXT: LBB0_54: ## %while.cond1683.preheader diff --git a/llvm/test/Transforms/JumpThreading/thread-prob-3.ll b/llvm/test/Transforms/JumpThreading/thread-prob-3.ll --- a/llvm/test/Transforms/JumpThreading/thread-prob-3.ll +++ b/llvm/test/Transforms/JumpThreading/thread-prob-3.ll @@ -5,8 +5,8 @@ ; call DuplicateCondBranchOnPHIIntoPred(bb3, {bb2}). ; ; CHECK-LABEL: ---- Branch Probability Info : foo -; CHECK: set edge bb2 -> 0 successor probability to 0x7fffffff / 0x80000000 = 100.00% -; CHECK-NEXT: set edge bb2 -> 1 successor probability to 0x00000001 / 0x80000000 = 0.00% +; CHECK: set edge bb2 -> 0 successor probability to 0x80000000 / 0x80000000 = 100.00% +; CHECK-NEXT: set edge bb2 -> 1 successor probability to 0x00000000 / 0x80000000 = 0.00% define void @foo(i1 %f0, i1 %f1, i1 %f2) !prof !{!"function_entry_count", i64 0} { ; CHECK-LABEL: @foo( bb1: