Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -40,6 +40,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/TailDuplicator.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" @@ -49,6 +50,8 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" #include +#include +#include using namespace llvm; #define DEBUG_TYPE "block-placement" @@ -266,6 +269,12 @@ /// \brief A typedef for a block filter set. typedef SmallSetVector BlockFilterSet; + /// Pair struct containing basic block and taildup profitiability + struct BlockAndTailDupResult { + MachineBasicBlock * BB; + bool ShouldTailDup; + }; + /// \brief work lists of blocks that are ready to be laid out SmallVector BlockWorkList; SmallVector EHPadWorkList; @@ -293,9 +302,12 @@ /// \brief A handle to the target's lowering info. const TargetLoweringBase *TLI; - /// \brief A handle to the post dominator tree. + /// \brief A handle to the dominator tree. MachineDominatorTree *MDT; + /// \brief A handle to the post dominator tree. + MachinePostDominatorTree *MPDT; + /// \brief Duplicator used to duplicate tails during placement. /// /// Placement decisions can open up new tail duplication opportunities, but @@ -368,9 +380,9 @@ BlockChain &SuccChain, BranchProbability SuccProb, BranchProbability RealSuccProb, BlockChain &Chain, const BlockFilterSet *BlockFilter); - MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, - BlockChain &Chain, - const BlockFilterSet *BlockFilter); + BlockAndTailDupResult selectBestSuccessor(MachineBasicBlock *BB, + BlockChain &Chain, + const BlockFilterSet *BlockFilter); MachineBasicBlock * selectBestCandidateBlock(BlockChain &Chain, SmallVectorImpl &WorkList); @@ -403,6 +415,18 @@ void buildCFGChains(); void optimizeBranches(); void alignBlocks(); + bool shouldTailDuplicate(MachineBasicBlock *BB); + /// Check the edge frequencies to see if tail duplication will increase + /// fallthroughs. + bool isProfitableToTailDup( + MachineBasicBlock *BB, MachineBasicBlock *Succ, + BranchProbability AdjustedSumProb, + BlockChain &Chain, const BlockFilterSet *BlockFilter); + /// Returns true if a block can tail duplicate into all unplaced + /// predecessors. Filters based on loop. + bool canTailDuplicateUnplacedPreds( + MachineBasicBlock *BB, MachineBasicBlock *Succ, + BlockChain &Chain, const BlockFilterSet *BlockFilter); public: static char ID; // Pass identification, replacement for typeid @@ -416,6 +440,8 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + if (TailDupPlacement) + AU.addRequired(); AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); @@ -430,6 +456,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement", "Branch Probability Basic Block Placement", false, false) @@ -561,6 +588,184 @@ return SuccProb; } +/// Check if a block should be tail duplicated. +/// \p BB Block to check. +bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) { + // Blocks with single successors don't create additional fallthrough + // opportunities. Don't duplicate them. TODO: When conditional exits are + // analyzable, allow them to be duplicated. + bool IsSimple = TailDup.isSimpleBB(BB); + + if (BB->succ_size() == 1) + return false; + return TailDup.shouldTailDuplicate(IsSimple, *BB); +} + +/// Check the edge frequencies to see if tail duplication will increase +/// fallthroughs. It only makes sense to call this function when +/// \p Succ would not be chosen otherwise. Tail duplication of \p Succ is +/// always locally profitable if we would have picked \p Succ without +/// considering duplication. +bool MachineBlockPlacement::isProfitableToTailDup( + MachineBasicBlock *BB, MachineBasicBlock *Succ, + BranchProbability AdjustedSumProb, + BlockChain &Chain, const BlockFilterSet *BlockFilter) { + // We need to do a probability calculation to make sure this is profitable. + // First: does succ have a successor that post-dominates? This affects the + // calculation. The 2 relevant cases are: + // BB BB + // | \Qout | \Qout + // P| C |P C + // = C' = C' + // | /Qin | /Qin + // | / | / + // Succ Succ + // / \ | \ V + // U/ =V |U \ + // / \ = D + // D E | / + // | / + // |/ + // PDom + // '=' : Branch taken for that CFG edge + // In the second case, Placing Succ while duplicating it into C prevents the + // fallthrough of Succ into either D or PDom, because they now have C as an + // unplaced predecessor + + // Start by figuring out which case we fall into + MachineBasicBlock *PDom = nullptr; + SmallVector SuccSuccs; + // Only scan the relevant successors + auto AdjustedSuccSumProb = + collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs); + // If there are no more successors, it is profitable to copy, as it strictly + // increases fallthrough. + if (SuccSuccs.size() == 0) + return true; + auto BestSuccSucc = BranchProbability::getZero(); + // Find the PDom or the best Succ if no PDom exists. + for (MachineBasicBlock *SuccSucc : SuccSuccs) { + auto Prob = MBPI->getEdgeProbability(Succ, SuccSucc); + if (Prob > BestSuccSucc) + BestSuccSucc = Prob; + if (PDom == nullptr) + if (MPDT->dominates(SuccSucc, Succ)) { + PDom = SuccSucc; + break; + } + } + // For the comparisons, we need to know Succ's best incoming edge that isn't + // from BB. + auto SuccBestPred = BlockFrequency(0); + for (MachineBasicBlock *SuccPred : Succ->predecessors()) { + if (SuccPred == Succ || SuccPred == BB + || BlockToChain[SuccPred] == &Chain + || (BlockFilter && !BlockFilter->count(SuccPred))) + continue; + auto Freq = MBFI->getBlockFreq(SuccPred) + * MBPI->getEdgeProbability(SuccPred, Succ); + if (Freq > SuccBestPred) + SuccBestPred = Freq; + } + auto BBFreq = MBFI->getBlockFreq(BB); + auto SuccFreq = MBFI->getBlockFreq(Succ); + BranchProbability PProb = MBPI->getEdgeProbability(BB, Succ); + BlockFrequency P = BBFreq * PProb; + // At this point, we don't know which block would be chosen instead of Succ. + // Using Qout as (1 - P) is conservative. + BlockFrequency Qout = BBFreq * (AdjustedSumProb - PProb); + // Qin is Succ's best unplaced incoming edge that isn't BB + BlockFrequency Qin = SuccBestPred; + // If it doesn't have a post-dominating successor, here is the calculation: + // BB BB + // | \Qout | \ + // P| C | = + // = C' | C + // | /Qin | | + // | / | C' (+Succ) + // Succ Succ /| + // / \ | \/ | + // U/ =V = /= = + // / \ | / \| + // D E D E + // '=' : Branch taken for that CFG edge + // Cost in the first case is: P + V + // For this calculation, we always assume P > Qout. If Qout > P + // The result of this function will be ignored at the caller. + // Cost in the second case is: Qout + Qin * V + P * U + P * V + // TODO(iteratee): If we lay out D after Succ, the P * U term + // goes away. This logic is coming in D28522. + if (PDom == nullptr || !Succ->isSuccessor(PDom)) { + BranchProbability UProb = BestSuccSucc; + BranchProbability VProb = AdjustedSuccSumProb - UProb; + BlockFrequency V = SuccFreq * VProb; + BlockFrequency QinV = Qin * VProb; + BlockFrequency BaseCost = P + V; + BlockFrequency DupCost = Qout + QinV + P * AdjustedSuccSumProb; + return (BaseCost > DupCost); + } + BranchProbability UProb = MBPI->getEdgeProbability(Succ, PDom); + BranchProbability VProb = AdjustedSuccSumProb - UProb; + BlockFrequency U = SuccFreq * UProb; + BlockFrequency V = SuccFreq * VProb; + // If there is a post-dominating successor, here is the calculation: + // BB BB BB BB + // | \Qout | \ | \Qout | \ + // |P C | = |P C | = + // = C' |P C = C' |P C + // | /Qin | | | /Qin | | + // | / | C' (+Succ) | / | C' (+Succ) + // Succ Succ /| Succ Succ /| + // | \ V | \/ | | \ V | \/ | + // |U \ |U /\ | |U = |U /\ | + // = D = = =| | D | = =| + // | / |/ D | / |/ D + // | / | / | = | / + // |/ | / |/ | = + // Dom Dom Dom Dom + // '=' : Branch taken for that CFG edge + // The cost for taken branches in the first case is P + U + // The cost in the second case (assuming independence), given the layout: + // BB, Succ, (C+Succ), D, Dom + // is Qout + P * V + Qin * U + // compare P + U vs Qout + P * V + Qin * U. + // + // The 3rd and 4th cases cover when Dom would be chosen to follow Succ. + // + // For the 3rd case, the cost is P + 2 * V + // For the 4th case, the cost is Qout + Qin * U + P * V + V + // We choose 4 over 3 when (P + V) > Qout + Qin * U + P * V + if (UProb > AdjustedSuccSumProb / 2 + && !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], + UProb, UProb, Chain, BlockFilter)) + // Cases 3 & 4 + return (P + V) > (Qout + Qin * UProb + P * VProb); + // Cases 1 & 2 + return (P + U) > (Qout + Qin * UProb + P * AdjustedSuccSumProb); +} + + +/// When the option TailDupPlacement is on, this method checks if the +/// fallthrough candidate block \p Succ (of block \p BB) can be tail-duplicated +/// into all of its unplaced, unfiltered predecessors, that are not BB. +bool MachineBlockPlacement::canTailDuplicateUnplacedPreds( + MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &Chain, + const BlockFilterSet *BlockFilter) { + if (!shouldTailDuplicate(Succ)) + return false; + + for (MachineBasicBlock *Pred : Succ->predecessors()) { + // Make sure all unplaced and unfiltered predecessors can be + // tail-duplicated into. + if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred)) + || BlockToChain[Pred] == &Chain) + continue; + if (!TailDup.canTailDuplicate(Succ, Pred)) + return false; + } + return true; +} + /// When the option OutlineOptionalBranches is on, this method /// checks if the fallthrough candidate block \p Succ (of block /// \p BB) also has other unscheduled predecessor blocks which @@ -609,11 +814,11 @@ if (Succ1->isSuccessor(Succ2) || Succ2->isSuccessor(Succ1)) { /* See case 1 below for the cost analysis. For BB->Succ to * be taken with smaller cost, the following needs to hold: - * Prob(BB->Succ) > 2* Prob(BB->Pred) - * So the threshold T - * T = 2 * (1-Prob(BB->Pred). Since T + Prob(BB->Pred) == 1, - * We have T + T/2 = 1, i.e. T = 2/3. Also adding user specified - * branch bias, we have + * Prob(BB->Succ) > 2 * Prob(BB->Pred) + * So the threshold T in the calculation below + * (1-T) * Prob(BB->Succ) > T * Prob(BB->Pred) + * So T / (1 - T) = 2, Yielding T = 2/3 + * Also adding user specified branch bias, we have * T = (2/3)*(ProfileLikelyProb/50) * = (2*ProfileLikelyProb)/150) */ @@ -625,6 +830,12 @@ /// Checks to see if the layout candidate block \p Succ has a better layout /// predecessor than \c BB. If yes, returns true. +/// \p SuccProb: The probability adjusted for only remaining blocks. +/// Only used for logging +/// \p RealSuccProb: The un-adjusted probability. +/// \p Chain: The chain that BB belongs to and Succ is being considered for. +/// \p BlockFilter: if non-null, the set of blocks that make up the loop being +/// considered bool MachineBlockPlacement::hasBetterLayoutPredecessor( MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &SuccChain, BranchProbability SuccProb, BranchProbability RealSuccProb, @@ -756,13 +967,15 @@ for (MachineBasicBlock *Pred : Succ->predecessors()) { if (Pred == Succ || BlockToChain[Pred] == &SuccChain || (BlockFilter && !BlockFilter->count(Pred)) || - BlockToChain[Pred] == &Chain) + BlockToChain[Pred] == &Chain || + // This check is redundant except for look ahead. This function is + // called for lookahead by isProfitableToTailDup when BB hasn't been + // placed yet. + (Pred == BB)) continue; // Do backward checking. // For all cases above, we need a backward checking to filter out edges that - // are not 'strongly' biased. With profile data available, the check is - // mostly redundant for case 2 (when threshold prob is set at 50%) unless S - // has more than two successors. + // are not 'strongly' biased. // BB Pred // \ / // Succ @@ -798,14 +1011,15 @@ /// breaking CFG structure, but cave and break such structures in the case of /// very hot successor edges. /// -/// \returns The best successor block found, or null if none are viable. -MachineBasicBlock * +/// \returns The best successor block found, or null if none are viable, along +/// with a boolean indicating if tail duplication is necessary. +MachineBlockPlacement::BlockAndTailDupResult MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter) { const BranchProbability HotProb(StaticLikelyProb, 100); - MachineBasicBlock *BestSucc = nullptr; + BlockAndTailDupResult BestSucc = { nullptr, false }; auto BestProb = BranchProbability::getZero(); SmallVector Successors; @@ -813,6 +1027,12 @@ collectViableSuccessors(BB, Chain, BlockFilter, Successors); DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB) << "\n"); + + // For blocks with CFG violations, we may be able to lay them out anyway with + // tail-duplication. We keep this vector so we can perform the probability + // calculations the minimum number of times. + SmallVector, 4> + DupCandidates; for (MachineBasicBlock *Succ : Successors) { auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ); BranchProbability SuccProb = @@ -820,15 +1040,21 @@ // This heuristic is off by default. if (shouldPredBlockBeOutlined(BB, Succ, Chain, BlockFilter, SuccProb, - HotProb)) - return Succ; + HotProb)) { + BestSucc.BB = Succ; + return BestSucc; + } BlockChain &SuccChain = *BlockToChain[Succ]; // Skip the edge \c BB->Succ if block \c Succ has a better layout // predecessor that yields lower global cost. if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb, - Chain, BlockFilter)) + Chain, BlockFilter)) { + // If tail duplication would make Succ profitable, place it. + if (TailDupPlacement && shouldTailDuplicate(Succ)) + DupCandidates.push_back(std::make_tuple(SuccProb, Succ)); continue; + } DEBUG( dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: " @@ -836,17 +1062,52 @@ << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "") << "\n"); - if (BestSucc && BestProb >= SuccProb) { + if (BestSucc.BB && BestProb >= SuccProb) { DEBUG(dbgs() << " Not the best candidate, continuing\n"); continue; } DEBUG(dbgs() << " Setting it as best candidate\n"); - BestSucc = Succ; + BestSucc.BB = Succ; BestProb = SuccProb; } - if (BestSucc) - DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc) << "\n"); + // Handle the tail duplication candidates in order of decreasing probability. + // Stop at the first one that is profitable. Also stop if they are less + // profitable than BestSucc. Position is important because we preserve it and + // prefer first best match. Here we aren't comparing in order, so we capture + // the position instead. + if (DupCandidates.size() != 0) { + auto cmp = + [](const std::tuple &a, + const std::tuple &b) { + return std::get<0>(a) > std::get<0>(b); + }; + std::stable_sort(DupCandidates.begin(), DupCandidates.end(), cmp); + } + for(auto &Tup : DupCandidates) { + BranchProbability DupProb; + MachineBasicBlock *Succ; + std::tie(DupProb, Succ) = Tup; + if (DupProb < BestProb) + break; + if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter) + // If tail duplication gives us fallthrough when we otherwise wouldn't + // have it, that is a strict gain. + && (BestSucc.BB == nullptr + || isProfitableToTailDup(BB, Succ, AdjustedSumProb, Chain, + BlockFilter))) { + DEBUG( + dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: " + << DupProb + << " (Tail Duplicate)\n"); + BestSucc.BB = Succ; + BestSucc.ShouldTailDup = true; + break; + } + } + + if (BestSucc.BB) + DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc.BB) << "\n"); return BestSucc; } @@ -995,7 +1256,11 @@ // Look for the best viable successor if there is one to place immediately // after this block. - MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); + auto Result = selectBestSuccessor(BB, Chain, BlockFilter); + MachineBasicBlock* BestSucc = Result.BB; + bool ShouldTailDup = Result.ShouldTailDup; + if (TailDupPlacement) + ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc)); // If an immediate successor isn't available, look for the best viable // block among those we've identified as not violating the loop's CFG at @@ -1016,7 +1281,7 @@ // Placement may have changed tail duplication opportunities. // Check for that now. - if (TailDupPlacement && BestSucc) { + if (TailDupPlacement && BestSucc && ShouldTailDup) { // If the chosen successor was duplicated into all its predecessors, // don't bother laying it out, just go round the loop again with BB as // the chain end. @@ -1908,13 +2173,8 @@ DuplicatedToLPred = false; DEBUG(dbgs() << "Redoing tail duplication for Succ#" << BB->getNumber() << "\n"); - bool IsSimple = TailDup.isSimpleBB(BB); - // Blocks with single successors don't create additional fallthrough - // opportunities. Don't duplicate them. TODO: When conditional exits are - // analyzable, allow them to be duplicated. - if (!IsSimple && BB->succ_size() == 1) - return false; - if (!TailDup.shouldTailDuplicate(IsSimple, *BB)) + + if (!shouldTailDuplicate(BB)) return false; // This has to be a callback because none of it can be done after // BB is deleted. @@ -1967,6 +2227,7 @@ llvm::function_ref(RemovalCallback); SmallVector DuplicatedPreds; + bool IsSimple = TailDup.isSimpleBB(BB); TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred, &DuplicatedPreds, &RemovalCallbackRef); @@ -2007,12 +2268,14 @@ TII = MF.getSubtarget().getInstrInfo(); TLI = MF.getSubtarget().getTargetLowering(); MDT = &getAnalysis(); + MPDT = nullptr; // Initialize PreferredLoopExit to nullptr here since it may never be set if // there are no MachineLoops. PreferredLoopExit = nullptr; if (TailDupPlacement) { + MPDT = &getAnalysis(); unsigned TailDupSize = TailDuplicatePlacementThreshold; if (MF.getFunction()->optForSize()) TailDupSize = 1; @@ -2043,6 +2306,8 @@ BlockToChain.clear(); // Must redo the dominator tree if blocks were changed. MDT->runOnMachineFunction(MF); + if (MPDT) + MPDT->runOnMachineFunction(MF); ChainAllocator.DestroyAll(); buildCFGChains(); } Index: test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll =================================================================== --- test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll +++ test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll @@ -664,11 +664,12 @@ ; No realignment in the prologue. ; CHECK-NOT: and ; CHECK-NOT: 0xffffffffffffffe0 -; CHECK: tbz {{.*}} .[[LABEL:.*]] +; CHECK: tbnz {{.*}} .[[LABEL:.*]] +; CHECK: ret +; CHECK: .[[LABEL]]: ; Stack is realigned in a non-entry BB. ; CHECK: sub [[REG:x[01-9]+]], sp, #64 ; CHECK: and sp, [[REG]], #0xffffffffffffffe0 -; CHECK: .[[LABEL]]: ; CHECK: ret @@ -687,14 +688,15 @@ ; CHECK-LABEL: realign_conditional2 ; Extra realignment in the prologue (performance issue). -; CHECK: tbz {{.*}} .[[LABEL:.*]] +; CHECK: tbnz {{.*}} .[[LABEL:.*]] +; CHECK: ret +; CHECK: .[[LABEL]]: ; CHECK: sub x9, sp, #32 // =32 ; CHECK: and sp, x9, #0xffffffffffffffe0 ; CHECK: mov x19, sp ; Stack is realigned in a non-entry BB. ; CHECK: sub [[REG:x[01-9]+]], sp, #64 ; CHECK: and sp, [[REG]], #0xffffffffffffffe0 -; CHECK: .[[LABEL]]: ; CHECK: ret attributes #0 = { "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } Index: test/CodeGen/AArch64/addsub.ll =================================================================== --- test/CodeGen/AArch64/addsub.ll +++ test/CodeGen/AArch64/addsub.ll @@ -140,12 +140,17 @@ test5: ; CHECK: cmn {{w[0-9]+}}, #444 -; CHECK: b.gt [[RET]] +; CHECK: b.le [[TEST6:.?LBB[0-9]+_[0-9]+]] %newval5 = add i32 %val, 4 store i32 %newval5, i32* @var_i32 %cmp_neg_uge = icmp sgt i32 %val2, -444 br i1 %cmp_neg_uge, label %ret, label %test6 +; CHECK: {{^}}[[RET]]: +; CHECK: ret +; CHECK: {{^}}[[TEST6]]: +; CHECK: ret + test6: %newval6 = add i32 %val, 5 store i32 %newval6, i32* @var_i32 Index: test/CodeGen/AArch64/arm64-atomic.ll =================================================================== --- test/CodeGen/AArch64/arm64-atomic.ll +++ test/CodeGen/AArch64/arm64-atomic.ll @@ -9,10 +9,10 @@ ; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]] ; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]] -; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]] +; CHECK-NEXT: ret ; CHECK-NEXT: [[FAILBB]]: ; CHECK-NEXT: clrex -; CHECK-NEXT: [[EXITBB]]: +; CHECK-NEXT: ret %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire %val = extractvalue { i32, i1 } %pair, 0 ret i32 %val @@ -27,10 +27,12 @@ ; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]] ; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], [[NEW]], [x0] ; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]] -; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]] +; CHECK-NEXT: mov x0, x[[ADDR]] +; CHECK-NEXT: ret ; CHECK-NEXT: [[FAILBB]]: ; CHECK-NEXT: clrex -; CHECK-NEXT: [[EXITBB]]: +; CHECK-NEXT: mov x0, x[[ADDR]] +; CHECK-NEXT: ret %new = load i32, i32* %pnew %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire %val = extractvalue { i32, i1 } %pair, 0 @@ -41,15 +43,15 @@ ; CHECK-LABEL: val_compare_and_swap_rel: ; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0 ; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]: -; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]] +; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]]] ; CHECK-NEXT: cmp [[RESULT]], w1 ; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]] -; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]] +; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]] -; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]] +; CHECK-NEXT: ret ; CHECK-NEXT: [[FAILBB]]: ; CHECK-NEXT: clrex -; CHECK-NEXT: [[EXITBB]]: +; CHECK-NEXT: ret %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic %val = extractvalue { i32, i1 } %pair, 0 ret i32 %val @@ -64,10 +66,10 @@ ; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]] ; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], x2, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]] -; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]] +; CHECK-NEXT: ret ; CHECK-NEXT: [[FAILBB]]: ; CHECK-NEXT: clrex -; CHECK-NEXT: [[EXITBB]]: +; CHECK-NEXT: ret %pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic monotonic %val = extractvalue { i64, i1 } %pair, 0 ret i64 %val Index: test/CodeGen/AArch64/arm64-ccmp.ll =================================================================== --- test/CodeGen/AArch64/arm64-ccmp.ll +++ test/CodeGen/AArch64/arm64-ccmp.ll @@ -4,10 +4,10 @@ ; CHECK: single_same ; CHECK: cmp w0, #5 ; CHECK-NEXT: ccmp w1, #17, #4, ne -; CHECK-NEXT: b.ne +; CHECK-NEXT: b.eq +; CHECK: %if.end ; CHECK: %if.then ; CHECK: bl _foo -; CHECK: %if.end define i32 @single_same(i32 %a, i32 %b) nounwind ssp { entry: %cmp = icmp eq i32 %a, 5 @@ -27,10 +27,10 @@ ; CHECK: single_different ; CHECK: cmp w0, #6 ; CHECK-NEXT: ccmp w1, #17, #0, ge -; CHECK-NEXT: b.eq +; CHECK-NEXT: b.ne +; CHECK: %if.end ; CHECK: %if.then ; CHECK: bl _foo -; CHECK: %if.end define i32 @single_different(i32 %a, i32 %b) nounwind ssp { entry: %cmp = icmp sle i32 %a, 5 @@ -51,7 +51,7 @@ ; CHECK: cmp ; CHECK: b.eq ; CHECK: cmp -; CHECK: b.gt +; CHECK: b.le define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp { entry: %cmp = icmp eq i32 %a, 5 @@ -78,7 +78,7 @@ ; CHECK: cmp ; CHECK: b.eq ; CHECK: cmp -; CHECK: tbz +; CHECK: tbnz define i32 @single_flagclobber_tbz(i32 %a, i32 %b) nounwind ssp { entry: %cmp = icmp eq i32 %a, 5 @@ -108,10 +108,10 @@ ; CHECK: cmp w0, #1 ; CHECK: sdiv [[DIVRES:w[0-9]+]], w1, w0 ; CHECK: ccmp [[DIVRES]], #16, #0, ge -; CHECK: b.gt [[BLOCK:LBB[0-9_]+]] -; CHECK: bl _foo -; CHECK: [[BLOCK]]: +; CHECK: b.le [[BLOCK:LBB[0-9_]+]] ; CHECK: orr w0, wzr, #0x7 +; CHECK: [[BLOCK]]: +; CHECK: bl _foo define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp { entry: %cmp = icmp sgt i32 %a, 0 @@ -135,7 +135,7 @@ ; CHECK: cmp ; CHECK-NOT: b. ; CHECK: fccmp {{.*}}, #8, ge -; CHECK: b.lt +; CHECK: b.ge define i32 @single_fcmp(i32 %a, float %b) nounwind ssp { entry: %cmp = icmp sgt i32 %a, 0 @@ -224,7 +224,7 @@ ; CHECK: cbz_second ; CHECK: cmp w0, #0 ; CHECK: ccmp w1, #0, #0, ne -; CHECK: b.eq +; CHECK: b.ne define i32 @cbz_second(i32 %a, i32 %b) nounwind ssp { entry: %cmp = icmp eq i32 %a, 0 @@ -244,7 +244,7 @@ ; CHECK: cbnz_second ; CHECK: cmp w0, #0 ; CHECK: ccmp w1, #0, #4, ne -; CHECK: b.ne +; CHECK: b.eq define i32 @cbnz_second(i32 %a, i32 %b) nounwind ssp { entry: %cmp = icmp eq i32 %a, 0 Index: test/CodeGen/AArch64/arm64-shrink-wrapping.ll =================================================================== --- test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -10,8 +10,11 @@ ; Compare the arguments and jump to exit. ; No prologue needed. ; ENABLE: cmp w0, w1 -; ENABLE-NEXT: b.ge [[EXIT_LABEL:LBB[0-9_]+]] +; ENABLE-NEXT: b.lt [[PROLOGUE_LABEL:LBB[0-9_]+]] +; ENABLE: ret ; + +; ENABLE: [[PROLOGUE_LABEL]] ; Prologue code. ; CHECK: sub sp, sp, #32 ; CHECK-NEXT: stp [[SAVE_SP:x[0-9]+]], [[CSR:x[0-9]+]], [sp, #16] @@ -37,7 +40,6 @@ ; CHECK-NEXT: add sp, sp, #32 ; ; With shrink-wrapping, exit block is a simple return. -; ENABLE: [[EXIT_LABEL]]: ; CHECK-NEXT: ret define i32 @foo(i32 %a, i32 %b) { %tmp = alloca i32, align 4 @@ -346,19 +348,15 @@ ; CHECK-NEXT: sub w1, w1, #1 ; CHECK-NEXT: add [[SUM]], [[SUM]], [[VA_VAL]] ; CHECK-NEXT: cbnz w1, [[LOOP_LABEL]] -; DISABLE-NEXT: b [[IFEND_LABEL]] -; -; DISABLE: [[ELSE_LABEL]]: ; %if.else -; DISABLE: lsl w0, w1, #1 -; -; CHECK: [[IFEND_LABEL]]: +; CHECK-NEXT: [[IFEND_LABEL]]: ; Epilogue code. ; CHECK: add sp, sp, #16 ; CHECK-NEXT: ret ; -; ENABLE: [[ELSE_LABEL]]: ; %if.else -; ENABLE-NEXT: lsl w0, w1, #1 -; ENABLE_NEXT: ret +; CHECK: [[ELSE_LABEL]]: ; %if.else +; CHECK-NEXT: lsl w0, w1, #1 +; DISABLE-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret define i32 @variadicFunc(i32 %cond, i32 %count, ...) #0 { entry: %ap = alloca i8*, align 8 Index: test/CodeGen/AArch64/branch-relax-bcc.ll =================================================================== --- test/CodeGen/AArch64/branch-relax-bcc.ll +++ test/CodeGen/AArch64/branch-relax-bcc.ll @@ -41,24 +41,22 @@ ; CHECK-LABEL: _block_split: ; CHECK: cmp w0, #5 -; CHECK-NEXT: b.eq [[LONG_BR_BB:LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: b [[LOR_LHS_FALSE_BB:LBB[0-9]+_[0-9]+]] - -; CHECK: [[LONG_BR_BB]]: +; CHECK-NEXT: b.ne [[LOR_LHS_FALSE_BB:LBB[0-9]+_[0-9]+]] ; CHECK-NEXT: b [[IF_THEN_BB:LBB[0-9]+_[0-9]+]] ; CHECK: [[LOR_LHS_FALSE_BB]]: ; CHECK: cmp w{{[0-9]+}}, #16 ; CHECK-NEXT: b.le [[IF_THEN_BB]] -; CHECK-NEXT: b [[IF_END_BB:LBB[0-9]+_[0-9]+]] + +; CHECK: ; %if.end +; CHECK: #0x7 +; CHECK: ret ; CHECK: [[IF_THEN_BB]]: ; CHECK: bl _foo ; CHECK-NOT: b L - -; CHECK: [[IF_END_BB]]: -; CHECK: #0x7 ; CHECK: ret + define i32 @block_split(i32 %a, i32 %b) #0 { entry: %cmp = icmp eq i32 %a, 5 Index: test/CodeGen/AArch64/compare-branch.ll =================================================================== --- test/CodeGen/AArch64/compare-branch.ll +++ test/CodeGen/AArch64/compare-branch.ll @@ -27,7 +27,7 @@ %val4 = load volatile i64, i64* @var64 %tst4 = icmp ne i64 %val4, 0 br i1 %tst4, label %end, label %test5, !prof !1 -; CHECK: cbnz {{x[0-9]+}}, .LBB +; CHECK: cbz {{x[0-9]+}}, .LBB test5: store volatile i64 %val4, i64* @var64 Index: test/CodeGen/AArch64/logical_shifted_reg.ll =================================================================== --- test/CodeGen/AArch64/logical_shifted_reg.ll +++ test/CodeGen/AArch64/logical_shifted_reg.ll @@ -210,7 +210,7 @@ test3: ; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}, asr #12 -; CHECK: b.gt .L +; CHECK: b.le .L %asr_op = ashr i64 %val2, 12 %asr_and = and i64 %asr_op, %val1 %tst3 = icmp sgt i64 %asr_and, 0 Index: test/CodeGen/AArch64/rm_redundant_cmp.ll =================================================================== --- test/CodeGen/AArch64/rm_redundant_cmp.ll +++ test/CodeGen/AArch64/rm_redundant_cmp.ll @@ -11,7 +11,7 @@ define void @test_i16_2cmp_signed_1() { ; CHECK-LABEL: test_i16_2cmp_signed_1 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.lt +; CHECK-NEXT: b.ge ; CHECK-NOT: cmp ; CHECK: ret entry: @@ -41,7 +41,7 @@ ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} ; CHECK-NEXT: b.gt ; CHECK-NOT: cmp -; CHECK: b.ge +; CHECK: b.lt entry: %0 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2 %1 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2 @@ -67,7 +67,7 @@ define void @test_i16_2cmp_unsigned_1() { ; CHECK-LABEL: test_i16_2cmp_unsigned_1 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.lo +; CHECK-NEXT: b.hs ; CHECK-NOT: cmp ; CHECK: ret entry: @@ -97,7 +97,7 @@ ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} ; CHECK-NEXT: b.hi ; CHECK-NOT: cmp -; CHECK: b.hs +; CHECK: b.lo entry: %0 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2 %1 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2 @@ -132,7 +132,7 @@ define void @test_i8_2cmp_signed_1() { ; CHECK-LABEL: test_i8_2cmp_signed_1 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.lt +; CHECK-NEXT: b.ge ; CHECK-NOT: cmp ; CHECK: ret entry: @@ -162,7 +162,7 @@ ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} ; CHECK-NEXT: b.gt ; CHECK-NOT: cmp -; CHECK: b.ge +; CHECK: b.lt entry: %0 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2 %1 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2 @@ -188,7 +188,7 @@ define void @test_i8_2cmp_unsigned_1() { ; CHECK-LABEL: test_i8_2cmp_unsigned_1 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.lo +; CHECK-NEXT: b.hs ; CHECK-NOT: cmp ; CHECK: ret entry: @@ -218,7 +218,7 @@ ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} ; CHECK-NEXT: b.hi ; CHECK-NOT: cmp -; CHECK: b.hs +; CHECK: b.lo entry: %0 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2 %1 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2 Index: test/CodeGen/AArch64/tail-dup-repeat-worklist.ll =================================================================== --- test/CodeGen/AArch64/tail-dup-repeat-worklist.ll +++ /dev/null @@ -1,69 +0,0 @@ -; RUN: llc -O3 -o - -verify-machineinstrs %s | FileCheck %s -target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -target triple = "aarch64-unknown-linux-gnu" - -%struct.s1 = type { %struct.s3*, %struct.s1* } -%struct.s2 = type opaque -%struct.s3 = type { i32 } - -; Function Attrs: nounwind -define internal fastcc i32 @repeated_dup_worklist(%struct.s1** %pp1, %struct.s2* %p2, i32 %state, i1 %i1_1, i32 %i32_1) unnamed_addr #0 { -entry: - br label %while.cond.outer - -; The loop gets laid out: -; %while.cond.outer -; %(null) -; %(null) -; %dup2 -; and then %dup1 gets chosen as the next block. -; when dup2 is duplicated into dup1, %worklist could erroneously be placed on -; the worklist, because all of its current predecessors are now scheduled. -; However, after dup2 is tail-duplicated, %worklist can't be on the worklist -; because it now has unscheduled predecessors.q -; CHECK-LABEL: repeated_dup_worklist -; CHECK: // %entry -; CHECK: // %while.cond.outer -; first %(null) block -; CHECK: // in Loop: -; CHECK: ldr -; CHECK-NEXT: tbnz -; second %(null) block -; CHECK: // in Loop: -; CHECK: // %dup2 -; CHECK: // %worklist -; CHECK: // %if.then96.i -while.cond.outer: ; preds = %dup1, %entry - %progress.0.ph = phi i32 [ 0, %entry ], [ %progress.1, %dup1 ] - %inc77 = add nsw i32 %progress.0.ph, 1 - %cmp = icmp slt i32 %progress.0.ph, %i32_1 - br i1 %cmp, label %dup2, label %dup1 - -dup2: ; preds = %if.then96.i, %worklist, %while.cond.outer - %progress.1.ph = phi i32 [ 0, %while.cond.outer ], [ %progress.1, %if.then96.i ], [ %progress.1, %worklist ] - %.pr = load %struct.s1*, %struct.s1** %pp1, align 8 - br label %dup1 - -dup1: ; preds = %dup2, %while.cond.outer - %0 = phi %struct.s1* [ %.pr, %dup2 ], [ undef, %while.cond.outer ] - %progress.1 = phi i32 [ %progress.1.ph, %dup2 ], [ %inc77, %while.cond.outer ] - br i1 %i1_1, label %while.cond.outer, label %worklist - -worklist: ; preds = %dup1 - %snode94 = getelementptr inbounds %struct.s1, %struct.s1* %0, i64 0, i32 0 - %1 = load %struct.s3*, %struct.s3** %snode94, align 8 - %2 = getelementptr inbounds %struct.s3, %struct.s3* %1, i32 0, i32 0 - %3 = load i32, i32* %2, align 4 - %tobool95.i = icmp eq i32 %3, 0 - br i1 %tobool95.i, label %if.then96.i, label %dup2 - -if.then96.i: ; preds = %worklist - call fastcc void @free_s3(%struct.s2* %p2, %struct.s3* %1) #1 - br label %dup2 -} - -; Function Attrs: nounwind -declare fastcc void @free_s3(%struct.s2*, %struct.s3*) unnamed_addr #0 - -attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+crc,+crypto,+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind } Index: test/CodeGen/AArch64/tbz-tbnz.ll =================================================================== --- test/CodeGen/AArch64/tbz-tbnz.ll +++ test/CodeGen/AArch64/tbz-tbnz.ll @@ -10,7 +10,7 @@ br i1 %cmp, label %if.then, label %if.end ; CHECK: sub [[CMP:w[0-9]+]], w0, #12 -; CHECK: tbz [[CMP]], #31 +; CHECK: tbnz [[CMP]], #31 if.then: call void @t() @@ -28,7 +28,7 @@ br i1 %cmp, label %if.then, label %if.end ; CHECK: sub [[CMP:x[0-9]+]], x0, #12 -; CHECK: tbz [[CMP]], #63 +; CHECK: tbnz [[CMP]], #63 if.then: call void @t() @@ -82,7 +82,7 @@ br i1 %cmp, label %if.then, label %if.end ; CHECK: sub [[CMP:w[0-9]+]], w0, #12 -; CHECK: tbnz [[CMP]], #31 +; CHECK: tbz [[CMP]], #31 if.then: call void @t() @@ -100,7 +100,7 @@ br i1 %cmp, label %if.then, label %if.end ; CHECK: sub [[CMP:x[0-9]+]], x0, #12 -; CHECK: tbnz [[CMP]], #63 +; CHECK: tbz [[CMP]], #63 if.then: call void @t() @@ -118,7 +118,7 @@ br i1 %cmp, label %if.then, label %if.end ; CHECK: sub [[CMP:w[0-9]+]], w0, #12 -; CHECK: tbz [[CMP]], #31 +; CHECK: tbnz [[CMP]], #31 if.then: call void @t() @@ -162,7 +162,7 @@ br i1 %tst4, label %if.then4, label %if.end ; CHECK: tst x0, x1, lsl #62 -; CHECK: b.lt +; CHECK: b.ge if.then4: call void @t() @@ -178,7 +178,7 @@ br i1 %tst, label %if.then, label %if.end ; CHECK-NOT: cmp -; CHECK: tbz x0, #63 +; CHECK: tbnz x0, #63 if.then: call void @t() @@ -194,7 +194,7 @@ br i1 %tst, label %if.then, label %if.end ; CHECK-NOT: cmp -; CHECK: tbz x0, #63 +; CHECK: tbnz x0, #63 if.then: call void @t() @@ -209,7 +209,7 @@ ; CHECK: ldr [[CMP:x[0-9]+]], [x1] ; CHECK-NOT: cmp -; CHECK: tbz [[CMP]], #63 +; CHECK: tbnz [[CMP]], #63 %val = load i64, i64* %ptr %tst = icmp slt i64 %val, 0 @@ -229,7 +229,7 @@ br i1 %tst, label %if.then, label %if.end ; CHECK-NOT: cmp -; CHECK: tbz x0, #63 +; CHECK: tbnz x0, #63 if.then: call void @t() @@ -247,7 +247,7 @@ ; CHECK: orr [[CMP:x[0-9]+]], x0, x1 ; CHECK-NOT: cmp -; CHECK: tbz [[CMP]], #63 +; CHECK: tbnz [[CMP]], #63 if.then: call void @t() @@ -262,7 +262,7 @@ br i1 %cond, label %if.end, label %if.then ; CHECK-NOT: and -; CHECK: tbnz w0, #0 +; CHECK: tbz w0, #0 if.then: call void @t() @@ -278,7 +278,7 @@ br i1 %cond1, label %if.then, label %if.end ; CHECK-NOT: movn -; CHECK: tbnz w0, #0 +; CHECK: tbz w0, #0 if.then: call void @t() @@ -296,7 +296,7 @@ br i1 %cond, label %then, label %end ; CHECK-NOT: lsl -; CHECK: tbnz w0, #2 +; CHECK: tbz w0, #2 then: call void @t() @@ -314,7 +314,7 @@ br i1 %cond, label %then, label %end ; CHECK-NOT: lsr -; CHECK: tbnz w0, #3 +; CHECK: tbz w0, #3 then: call void @t() @@ -331,7 +331,7 @@ br i1 %cond, label %then, label %end ; CHECK-NOT: asr -; CHECK: tbnz w0, #31 +; CHECK: tbz w0, #31 then: call void @t() @@ -350,7 +350,7 @@ br i1 %cond, label %then, label %end ; CHECK-NOT: ubfx -; CHECK: tbnz w0, #3 +; CHECK: tbz w0, #3 then: call void @t() Index: test/CodeGen/AMDGPU/basic-branch.ll =================================================================== --- test/CodeGen/AMDGPU/basic-branch.ll +++ test/CodeGen/AMDGPU/basic-branch.ll @@ -8,13 +8,10 @@ ; GCNNOOPT: v_writelane_b32 ; GCN: s_cbranch_scc1 [[END:BB[0-9]+_[0-9]+]] - -; GCN: ; BB#1 ; GCNNOOPT: v_readlane_b32 ; GCNNOOPT: v_readlane_b32 ; GCN: buffer_store_dword -; GCNOPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; TODO: This waitcnt can be eliminated +; GCNNOOPT: s_endpgm ; GCN: {{^}}[[END]]: ; GCN: s_endpgm @@ -34,13 +31,14 @@ ; GCN: buffer_load_ubyte ; GCN: v_and_b32_e32 v{{[0-9]+}}, 1, ; GCN: v_cmp_eq_u32_e32 vcc, -; GCN: s_cbranch_vccnz [[END:BB[0-9]+_[0-9]+]] +; GCNNOOPT: s_cbranch_vccnz [[END:BB[0-9]+_[0-9]+]] +; GCNOPT: s_cbranch_vccz [[BODY:BB[0-9]+_[0-9]+]] +; GCNOPT: s_endpgm +; GCNOPT: {{^}}[[BODY]]: ; GCN: buffer_store_dword -; GCNOPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; TODO: This waitcnt can be eliminated -; GCN: {{^}}[[END]]: +; GCNNOOPT: {{^}}[[END]]: ; GCN: s_endpgm define void @test_brcc_i1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i1 %val) #0 { %cmp0 = icmp ne i1 %val, 0 Index: test/CodeGen/AMDGPU/br_cc.f16.ll =================================================================== --- test/CodeGen/AMDGPU/br_cc.f16.ll +++ test/CodeGen/AMDGPU/br_cc.f16.ll @@ -47,18 +47,15 @@ ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] ; SI: v_cmp_ngt_f32_e32 vcc, v[[B_F32]], v[[A_F32]] -; SI: s_cbranch_vccz ; VI: v_cmp_nlt_f16_e32 vcc, 0.5, v[[B_F16]] -; VI: s_cbranch_vccnz - -; VI: one{{$}} -; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x380{{0|1}}{{$}} +; GCN: s_cbranch_vccz ; GCN: two{{$}} ; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[B_F32]] -; SI: one{{$}} +; GCN: one{{$}} +; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x380{{0|1}}{{$}} ; SI: buffer_store_short v[[A_F16]] ; SI: s_endpgm Index: test/CodeGen/AMDGPU/branch-relaxation.ll =================================================================== --- test/CodeGen/AMDGPU/branch-relaxation.ll +++ test/CodeGen/AMDGPU/branch-relaxation.ll @@ -335,6 +335,12 @@ ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: [[BB3]]: ; %bb3 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: v_nop_e64 +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: v_nop_e64 +; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: s_endpgm define void @expand_requires_expand(i32 %cond0) #0 { bb0: @@ -356,6 +362,12 @@ br label %bb3 bb3: +; These NOPs prevent tail-duplication-based outlining +; from firing, which defeats the need to expand the branches and this test. + call void asm sideeffect + "v_nop_e64", ""() #0 + call void asm sideeffect + "v_nop_e64", ""() #0 ret void } @@ -385,6 +397,7 @@ ; GCN-NEXT: [[ENDIF]]: ; %endif ; GCN-NEXT: s_or_b64 exec, exec, [[MASK]] +; GCN-NEXT: s_sleep 5 ; GCN-NEXT: s_endpgm define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) #0 { entry: @@ -402,6 +415,9 @@ br label %endif endif: + ; layout can remove the split branch if it can copy the return block. + ; This call makes the return block long enough that it doesn't get copied. + call void @llvm.amdgcn.s.sleep(i32 5); ret void } Index: test/CodeGen/AMDGPU/convergent-inlineasm.ll =================================================================== --- test/CodeGen/AMDGPU/convergent-inlineasm.ll +++ test/CodeGen/AMDGPU/convergent-inlineasm.ll @@ -26,9 +26,12 @@ ; GCN: ; mask branch ; GCN: BB{{[0-9]+_[0-9]+}}: -; GCN: v_cmp_ne_u32_e64 +; GCN: s_endpgm ; GCN: BB{{[0-9]+_[0-9]+}}: +; GCN: v_cmp_ne_u32_e64 + + define void @nonconvergent_inlineasm(i64 addrspace(1)* nocapture %arg) { bb: %tmp = call i32 @llvm.amdgcn.workitem.id.x() Index: test/CodeGen/AMDGPU/i1-copy-implicit-def.ll =================================================================== --- test/CodeGen/AMDGPU/i1-copy-implicit-def.ll +++ test/CodeGen/AMDGPU/i1-copy-implicit-def.ll @@ -4,7 +4,7 @@ ; SILowerI1Copies was not handling IMPLICIT_DEF ; SI-LABEL: {{^}}br_implicit_def: ; SI: BB#0: -; SI-NEXT: s_cbranch_scc1 +; SI-NEXT: s_cbranch_scc0 define void @br_implicit_def(i32 addrspace(1)* %out, i32 %arg) #0 { bb: br i1 undef, label %bb1, label %bb2 Index: test/CodeGen/AMDGPU/salu-to-valu.ll =================================================================== --- test/CodeGen/AMDGPU/salu-to-valu.ll +++ test/CodeGen/AMDGPU/salu-to-valu.ll @@ -435,11 +435,12 @@ ; GCN: s_load_dword [[SGPR:s[0-9]+]] ; GCN: v_cmp_le_u32_e32 vcc, [[SGPR]], v{{[0-9]+}} ; GCN: s_and_b64 vcc, exec, vcc -; GCN: s_cbranch_vccnz [[EXIT:[A-Z0-9_]+]] +; GCN: s_cbranch_vccz [[BODY:[A-Z0-9_]+]] +; GCN: s_endpgm +; GCN: {{^}}[[BODY]]: ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN-NOHSA: buffer_store_dword [[ONE]] ; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[ONE]] -; GCN; {{^}}[[EXIT]]: ; GCN: s_endpgm define void @sopc_vopc_legalize_bug(i32 %cond, i32 addrspace(1)* %out, i32 addrspace(1)* %in) { bb3: ; preds = %bb2 Index: test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll =================================================================== --- test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll +++ test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll @@ -37,7 +37,10 @@ ; OPT-NOT: call i1 @llvm.amdgcn.loop ; GCN-LABEL: {{^}}annotate_ret_noloop: -; GCN: s_cbranch_scc1 +; GCN: s_cbranch_scc0 [[BODY:BB[0-9]+_[0-9]+]] +; GCN: s_endpgm + +; GCN: {{^}}[[BODY]]: ; GCN: s_endpgm ; GCN: .Lfunc_end1 define void @annotate_ret_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 { Index: test/CodeGen/AMDGPU/uniform-cfg.ll =================================================================== --- test/CodeGen/AMDGPU/uniform-cfg.ll +++ test/CodeGen/AMDGPU/uniform-cfg.ll @@ -119,9 +119,10 @@ ; be selected for the SALU and then later moved to the VALU. ; GCN: v_cmp_ne_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 5, [[CMP]] ; GCN: s_and_b64 vcc, exec, [[COND]] -; GCN: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]] +; GCN: s_cbranch_vccz [[IF_LABEL:[0-9_A-Za-z]+]] +; GCN: s_endpgm +; GCN: [[IF_LABEL]]: ; GCN: buffer_store_dword -; GCN: [[ENDIF_LABEL]]: ; GCN: s_endpgm define void @uniform_if_move_valu(i32 addrspace(1)* %out, float %a) { entry: @@ -144,9 +145,10 @@ ; be selected for the SALU and then later moved to the VALU. ; GCN: v_cmp_gt_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 6, [[CMP]] ; GCN: s_and_b64 vcc, exec, [[COND]] -; GCN: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]] +; GCN: s_cbranch_vccz [[IF_LABEL:[0-9_A-Za-z]+]] +; GCN: s_endpgm +; GCN: [[IF_LABEL]]: ; GCN: buffer_store_dword -; GCN: [[ENDIF_LABEL]]: ; GCN: s_endpgm define void @uniform_if_move_valu_commute(i32 addrspace(1)* %out, float %a) { entry: @@ -252,10 +254,12 @@ ; GCN: s_cmp_lt_i32 [[COND]], 1 ; GCN: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]] ; GCN: v_cmp_gt_i32_e64 vcc, [[COND]], 0{{$}} -; GCN: s_cbranch_vccnz [[EXIT]] -; GCN: buffer_store +; GCN: s_cbranch_vccz [[BODY:[A-Za-z0-9_]+]] ; GCN: {{^}}[[EXIT]]: ; GCN: s_endpgm +; GCN: {{^}}[[BODY]]: +; GCN: buffer_store +; GCN: s_endpgm define void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, i32 addrspace(1)* %out) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 @@ -302,9 +306,10 @@ ; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc ; GCN: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] -; GCN: s_cbranch_execz [[ENDIF_LABEL:[0-9_A-Za-z]+]] ; GCN: s_cmp_lg_u32 {{s[0-9]+}}, 0 -; GCN: s_cbranch_scc1 [[ENDIF_LABEL]] +; GCN: s_cbranch_scc0 [[IF_UNIFORM_LABEL:[A-Z0-9_a-z]+]] +; GCN: s_endpgm +; GCN: {{^}}[[IF_UNIFORM_LABEL]]: ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN: buffer_store_dword [[ONE]] define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) { @@ -328,14 +333,13 @@ ; GCN-LABEL: {{^}}divergent_inside_uniform: ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 -; GCN: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]] +; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]] +; GCN: [[IF_LABEL]]: ; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc ; GCN: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN: buffer_store_dword [[ONE]] -; GCN: [[ENDIF_LABEL]]: -; GCN: s_endpgm define void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) { entry: %u_cmp = icmp eq i32 %cond, 0 @@ -363,11 +367,11 @@ ; GCN: buffer_store_dword [[ONE]] ; GCN: s_or_b64 exec, exec, [[MASK]] ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 -; GCN: s_cbranch_scc1 [[EXIT:[A-Z0-9_]+]] +; GCN: s_cbranch_scc0 [[IF_UNIFORM:[A-Z0-9_]+]] +; GCN: s_endpgm +; GCN: [[IF_UNIFORM]]: ; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 ; GCN: buffer_store_dword [[TWO]] -; GCN: [[EXIT]]: -; GCN: s_endpgm define void @divergent_if_uniform_if(i32 addrspace(1)* %out, i32 %cond) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 @@ -398,16 +402,20 @@ ; GCN-LABEL: {{^}}cse_uniform_condition_different_blocks: ; GCN: s_load_dword [[COND:s[0-9]+]] ; GCN: s_cmp_lt_i32 [[COND]], 1 -; GCN: s_cbranch_scc1 BB[[FNNUM:[0-9]+]]_3 +; GCN: s_cbranch_scc1 [[FN:BB[0-9_]+]] ; GCN: BB#1: ; GCN-NOT: cmp ; GCN: buffer_load_dword ; GCN: buffer_store_dword -; GCN: s_cbranch_scc1 BB[[FNNUM]]_3 +; GCN: s_cbranch_scc0 [[BB7:BB[0-9_]+]] -; GCN: BB[[FNNUM]]_3: +; GCN: [[FN]]: ; GCN: s_endpgm + +; GCN: [[BB7]]: +; GCN: s_endpgm + define void @cse_uniform_condition_different_blocks(i32 %cond, i32 addrspace(1)* %out) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 Index: test/CodeGen/AMDGPU/valu-i1.ll =================================================================== --- test/CodeGen/AMDGPU/valu-i1.ll +++ test/CodeGen/AMDGPU/valu-i1.ll @@ -9,8 +9,8 @@ ; waitcnt should be inserted after exec modification ; SI: v_cmp_lt_i32_e32 vcc, 0, -; SI-NEXT: s_and_saveexec_b64 [[SAVE1:s\[[0-9]+:[0-9]+\]]], vcc -; SI-NEXT: s_xor_b64 [[SAVE2:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE1]] +; SI-NEXT: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], vcc +; SI-NEXT: s_xor_b64 [[SAVE2:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE]] ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: ; mask branch [[FLOW_BB:BB[0-9]+_[0-9]+]] ; SI-NEXT: s_cbranch_execz [[FLOW_BB]] @@ -24,9 +24,9 @@ ; v_mov should be after exec modification ; SI: [[FLOW_BB]]: -; SI-NEXT: s_or_saveexec_b64 [[SAVE3:s\[[0-9]+:[0-9]+\]]], [[SAVE2]] +; SI-NEXT: s_or_saveexec_b64 [[SAVE]], [[SAVE2]] ; SI-NEXT: v_mov_b32_e32 v{{[0-9]+}} -; SI-NEXT: s_xor_b64 exec, exec, [[SAVE3]] +; SI-NEXT: s_xor_b64 exec, exec, [[SAVE]] ; SI-NEXT: ; mask branch ; define void @test_if(i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) #1 { @@ -137,9 +137,13 @@ ; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]], vcc ; SI: s_xor_b64 [[OUTER_CMP_SREG]], exec, [[OUTER_CMP_SREG]] ; SI: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]] +; SI: s_branch [[LABEL_PREHEADER:BB[0-9]+_[0-9]+]] + +; SI: [[LABEL_EXIT]]: +; SI: s_endpgm ; Initialize inner condition to false -; SI: BB{{[0-9]+_[0-9]+}}: ; %bb10.preheader +; SI: [[LABEL_PREHEADER]]: ; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0{{$}} ; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], [[ZERO]] @@ -166,10 +170,8 @@ ; SI-NEXT: s_andn2_b64 exec, exec, [[COND_STATE]] ; SI-NEXT: s_cbranch_execnz [[LABEL_LOOP]] -; SI: BB#5 +; SI: BB#6 ; SI: s_or_b64 exec, exec, [[COND_STATE]] - -; SI: [[LABEL_EXIT]]: ; SI-NOT: [[COND_STATE]] ; SI: s_endpgm Index: test/CodeGen/ARM/arm-and-tst-peephole.ll =================================================================== --- test/CodeGen/ARM/arm-and-tst-peephole.ll +++ test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -49,9 +49,9 @@ ; V8-NEXT: beq ; V8-NEXT: %tailrecurse.switch ; V8: cmp -; V8-NEXT: bne -; V8-NEXT: b -; The trailing space in the last line checks that the branch is unconditional +; V8-NEXT: beq +; V8-NEXT: %sw.epilog +; V8-NEXT: bx lr switch i32 %and, label %sw.epilog [ i32 1, label %sw.bb i32 3, label %sw.bb6 Index: test/CodeGen/ARM/arm-shrink-wrapping.ll =================================================================== --- test/CodeGen/ARM/arm-shrink-wrapping.ll +++ test/CodeGen/ARM/arm-shrink-wrapping.ll @@ -23,9 +23,11 @@ ; Compare the arguments and jump to exit. ; No prologue needed. ; ENABLE: cmp r0, r1 -; ENABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]] +; ENABLE-NEXT: blt [[BODY_LABEL:LBB[0-9_]+]] +; ENABLE: bx lr ; ; Prologue code. +; ENABLE: [[BODY_LABEL]]: ; CHECK: push {r7, lr} ; CHECK-NEXT: mov r7, sp ;; @@ -33,7 +35,8 @@ ; After the prologue is set. ; DISABLE: sub sp ; DISABLE: cmp r0, r1 -; DISABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]] +; DISABLE-NEXT: blt [[BODY_LABEL:LBB[0-9_]+]] +; DISABLE: pop {r7, pc} ; ; Store %a in the alloca. ; ARM-ENABLE: push {r0} @@ -50,8 +53,6 @@ ; THUMB-ENABLE-NEXT: add sp, #4 ; ENABLE-NEXT: pop{{(\.w)?}} {r7, lr} ; -; CHECK: [[EXIT_LABEL]]: -; ; Without shrink-wrapping, epilogue is in the exit block. ; Epilogue code. (What we pop does not matter.) ; ARM-DISABLE: mov sp, r7 Index: test/CodeGen/ARM/atomic-cmpxchg.ll =================================================================== --- test/CodeGen/ARM/atomic-cmpxchg.ll +++ test/CodeGen/ARM/atomic-cmpxchg.ll @@ -54,9 +54,10 @@ ; CHECK-THUMBV6-NEXT: movs r0, #1 ; CHECK-THUMBV6-NEXT: movs [[ZERO:r[0-9]+]], #0 ; CHECK-THUMBV6-NEXT: cmp [[RES]], [[EXPECTED]] -; CHECK-THUMBV6-NEXT: beq [[END:.LBB[0-9_]+]] +; CHECK-THUMBV6-NEXT: bne [[NONEQ:.LBB[0-9_]+]] +; CHECK-THUMBV6-NEXT: pop {{.*}}pc} +; CHECK-THUMBV6-NEXT: [[NONEQ]]: ; CHECK-THUMBV6-NEXT: mov r0, [[ZERO]] -; CHECK-THUMBV6-NEXT: [[END]]: ; CHECK-THUMBV6-NEXT: pop {{.*}}pc} ; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8: @@ -66,14 +67,14 @@ ; CHECK-ARMV7-NEXT: [[HEAD:.LBB[0-9_]+]]: ; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0] ; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0 -; CHECK-ARMV7-NEXT: moveq [[RES:r[0-9]+]], #1 +; CHECK-ARMV7-NEXT: moveq r0, #1 ; CHECK-ARMV7-NEXT: bxeq lr ; CHECK-ARMV7-NEXT: [[TRY]]: -; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0] -; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]] +; CHECK-ARMV7-NEXT: ldrexb [[SUCCESS]], [r0] +; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], r1 ; CHECK-ARMV7-NEXT: beq [[HEAD]] ; CHECK-ARMV7-NEXT: clrex -; CHECK-ARMV7-NEXT: mov [[RES]], #0 +; CHECK-ARMV7-NEXT: mov r0, #0 ; CHECK-ARMV7-NEXT: bx lr ; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8: Index: test/CodeGen/ARM/atomic-op.ll =================================================================== --- test/CodeGen/ARM/atomic-op.ll +++ test/CodeGen/ARM/atomic-op.ll @@ -320,10 +320,10 @@ ; CHECK: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]] ; CHECK: cmp [[SUCCESS]], #0 ; CHECK: bne [[LOOP_BB]] -; CHECK: b [[END_BB:\.?LBB[0-9]+_[0-9]+]] +; CHECK: dmb ish +; CHECK: bx lr ; CHECK: [[FAIL_BB]]: ; CHECK-NEXT: clrex -; CHECK-NEXT: [[END_BB]]: ; CHECK: dmb ish ; CHECK: bx lr Index: test/CodeGen/ARM/atomic-ops-v8.ll =================================================================== --- test/CodeGen/ARM/atomic-ops-v8.ll +++ test/CodeGen/ARM/atomic-ops-v8.ll @@ -1045,20 +1045,21 @@ ; function there. ; CHECK-ARM-NEXT: cmp r[[OLD]], r0 ; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]] -; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_4 ; CHECK-NEXT: BB#2: ; As above, r1 is a reasonable guess. ; CHECK: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NEXT: b .LBB{{[0-9]+}}_4 -; CHECK-NEXT: .LBB{{[0-9]+}}_3: -; CHECK-NEXT: clrex +; CHECK-ARM: mov r0, r[[OLD]] +; CHECK: bx lr ; CHECK-NEXT: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: clrex ; CHECK-NOT: dmb ; CHECK-NOT: mcr ; CHECK-ARM: mov r0, r[[OLD]] +; CHECK-ARM-NEXT: bx lr ret i8 %old } @@ -1078,20 +1079,21 @@ ; function there. ; CHECK-ARM-NEXT: cmp r[[OLD]], r0 ; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]] -; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_4 ; CHECK-NEXT: BB#2: ; As above, r1 is a reasonable guess. ; CHECK: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NEXT: b .LBB{{[0-9]+}}_4 -; CHECK-NEXT: .LBB{{[0-9]+}}_3: -; CHECK-NEXT: clrex +; CHECK-ARM: mov r0, r[[OLD]] +; CHECK: bx lr ; CHECK-NEXT: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: clrex ; CHECK-NOT: dmb ; CHECK-NOT: mcr ; CHECK-ARM: mov r0, r[[OLD]] +; CHECK-ARM-NEXT: bx lr ret i16 %old } @@ -1110,20 +1112,21 @@ ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp r[[OLD]], r0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_4 ; CHECK-NEXT: BB#2: ; As above, r1 is a reasonable guess. ; CHECK: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NEXT: b .LBB{{[0-9]+}}_4 -; CHECK-NEXT: .LBB{{[0-9]+}}_3: -; CHECK-NEXT: clrex +; CHECK: str{{(.w)?}} r[[OLD]], +; CHECK-NEXT: bx lr ; CHECK-NEXT: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: clrex ; CHECK-NOT: dmb ; CHECK-NOT: mcr ; CHECK: str{{(.w)?}} r[[OLD]], +; CHECK-ARM-NEXT: bx lr ret void } @@ -1148,16 +1151,16 @@ ; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0 ; CHECK-ARM-BE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_HI]], [[MISMATCH_LO]] ; CHECK-THUMB-BE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_LO]], [[MISMATCH_HI]] -; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_4 ; CHECK-NEXT: BB#2: ; As above, r2, r3 is a reasonable guess. ; CHECK: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NEXT: b .LBB{{[0-9]+}}_4 -; CHECK-NEXT: .LBB{{[0-9]+}}_3: -; CHECK-NEXT: clrex +; CHECK: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] +; CHECK-NEXT: pop ; CHECK-NEXT: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: clrex ; CHECK-NOT: dmb ; CHECK-NOT: mcr Index: test/CodeGen/ARM/cmpxchg-weak.ll =================================================================== --- test/CodeGen/ARM/cmpxchg-weak.ll +++ test/CodeGen/ARM/cmpxchg-weak.ll @@ -13,14 +13,16 @@ ; CHECK-NEXT: dmb ish ; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r2, [r0] ; CHECK-NEXT: cmp [[SUCCESS]], #0 -; CHECK-NEXT: bne [[FAILBB:LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: beq [[SUCCESSBB:LBB[0-9]+_[0-9]+]] ; CHECK-NEXT: BB#2: -; CHECK-NEXT: dmb ish ; CHECK-NEXT: str r3, [r0] ; CHECK-NEXT: bx lr ; CHECK-NEXT: [[LDFAILBB]]: ; CHECK-NEXT: clrex -; CHECK-NEXT: [[FAILBB]]: +; CHECK-NEXT: str r3, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: [[SUCCESSBB]]: +; CHECK-NEXT: dmb ish ; CHECK-NEXT: str r3, [r0] ; CHECK-NEXT: bx lr Index: test/CodeGen/ARM/machine-cse-cmp.ll =================================================================== --- test/CodeGen/ARM/machine-cse-cmp.ll +++ test/CodeGen/ARM/machine-cse-cmp.ll @@ -52,7 +52,7 @@ ; CHECK-LABEL: f3: ; CHECK-NOT: sub ; CHECK: cmp -; CHECK: blt +; CHECK: bge %0 = load i32, i32* %offset, align 4 %cmp = icmp slt i32 %0, %size %s = sub nsw i32 %0, %size Index: test/CodeGen/Mips/brconeq.ll =================================================================== --- test/CodeGen/Mips/brconeq.ll +++ test/CodeGen/Mips/brconeq.ll @@ -8,11 +8,11 @@ entry: %0 = load i32, i32* @i, align 4 %1 = load i32, i32* @j, align 4 - %cmp = icmp eq i32 %0, %1 + %cmp = icmp ne i32 %0, %1 ; 16: cmp ${{[0-9]+}}, ${{[0-9]+}} -; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]] +; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]] ; 16: $[[LABEL]]: - br i1 %cmp, label %if.end, label %if.then + br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry store i32 1, i32* @result, align 4 Index: test/CodeGen/Mips/brconeqk.ll =================================================================== --- test/CodeGen/Mips/brconeqk.ll +++ test/CodeGen/Mips/brconeqk.ll @@ -6,10 +6,10 @@ define void @test() nounwind { entry: %0 = load i32, i32* @i, align 4 - %cmp = icmp eq i32 %0, 10 - br i1 %cmp, label %if.end, label %if.then + %cmp = icmp ne i32 %0, 10 + br i1 %cmp, label %if.then, label %if.end ; 16: cmpi ${{[0-9]+}}, {{[0-9]+}} -; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]] +; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]] ; 16: $[[LABEL]]: if.then: ; preds = %entry store i32 1, i32* @result, align 4 Index: test/CodeGen/Mips/brcongt.ll =================================================================== --- test/CodeGen/Mips/brcongt.ll +++ test/CodeGen/Mips/brcongt.ll @@ -9,10 +9,10 @@ entry: %0 = load i32, i32* @i, align 4 %1 = load i32, i32* @j, align 4 - %cmp = icmp sgt i32 %0, %1 - br i1 %cmp, label %if.end, label %if.then + %cmp = icmp sle i32 %0, %1 + br i1 %cmp, label %if.then, label %if.end ; 16: slt ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]] +; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]] ; 16: $[[LABEL]]: if.then: ; preds = %entry store i32 1, i32* @result, align 4 Index: test/CodeGen/Mips/brconlt.ll =================================================================== --- test/CodeGen/Mips/brconlt.ll +++ test/CodeGen/Mips/brconlt.ll @@ -10,12 +10,12 @@ entry: %0 = load i32, i32* @j, align 4 %1 = load i32, i32* @i, align 4 - %cmp = icmp slt i32 %0, %1 - br i1 %cmp, label %if.end, label %if.then + %cmp = icmp sge i32 %0, %1 + br i1 %cmp, label %if.then, label %if.end ; 16: slt ${{[0-9]+}}, ${{[0-9]+}} ; MM32R6: slt ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]] +; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]] ; 16: $[[LABEL]]: if.then: ; preds = %entry Index: test/CodeGen/Mips/brconne.ll =================================================================== --- test/CodeGen/Mips/brconne.ll +++ test/CodeGen/Mips/brconne.ll @@ -11,9 +11,9 @@ %cmp = icmp eq i32 %0, %1 br i1 %cmp, label %if.then, label %if.end ; 16: cmp ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]] -; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}}) +; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]] ; 16: $[[LABEL]]: +; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}}) if.then: ; preds = %entry store i32 1, i32* @result, align 4 Index: test/CodeGen/Mips/brconnek.ll =================================================================== --- test/CodeGen/Mips/brconnek.ll +++ test/CodeGen/Mips/brconnek.ll @@ -10,9 +10,9 @@ br i1 %cmp, label %if.then, label %if.end ; 16: cmpi ${{[0-9]+}}, {{[0-9]+}} -; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]] -; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}}) +; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]] ; 16: $[[LABEL]]: +; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}}) if.then: ; preds = %entry store i32 1, i32* @result, align 4 Index: test/CodeGen/Mips/brconnez.ll =================================================================== --- test/CodeGen/Mips/brconnez.ll +++ test/CodeGen/Mips/brconnez.ll @@ -7,7 +7,7 @@ entry: %0 = load i32, i32* @j, align 4 %cmp = icmp eq i32 %0, 0 - br i1 %cmp, label %if.then, label %if.end + br i1 %cmp, label %if.then, label %if.end, !prof !1 ; 16: bnez ${{[0-9]+}}, $[[LABEL:[0-9A-Ba-b_]+]] ; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}}) @@ -21,4 +21,4 @@ ret void } - +!1 = !{!"branch_weights", i32 2, i32 1} Index: test/CodeGen/Mips/fcmp.ll =================================================================== --- test/CodeGen/Mips/fcmp.ll +++ test/CodeGen/Mips/fcmp.ll @@ -1065,7 +1065,7 @@ ; 32-C-DAG: add.s $[[T0:f[0-9]+]], $f14, $f12 ; 32-C-DAG: lwc1 $[[T1:f[0-9]+]], %lo($CPI32_0)( ; 32-C-DAG: c.ole.s $[[T0]], $[[T1]] -; 32-C-DAG: bc1t +; 32-C-DAG: bc1f ; 32-CMP-DAG: add.s $[[T0:f[0-9]+]], $f14, $f12 ; 32-CMP-DAG: lwc1 $[[T1:f[0-9]+]], %lo($CPI32_0)( @@ -1073,12 +1073,12 @@ ; 32-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]] ; FIXME: This instruction is redundant. ; 32-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1 -; 32-CMP-DAG: bnezc $[[T4]], +; 32-CMP-DAG: beqzc $[[T4]], ; 64-C-DAG: add.s $[[T0:f[0-9]+]], $f13, $f12 ; 64-C-DAG: lwc1 $[[T1:f[0-9]+]], %got_ofst(.LCPI32_0)( ; 64-C-DAG: c.ole.s $[[T0]], $[[T1]] -; 64-C-DAG: bc1t +; 64-C-DAG: bc1f ; 64-CMP-DAG: add.s $[[T0:f[0-9]+]], $f13, $f12 ; 64-CMP-DAG: lwc1 $[[T1:f[0-9]+]], %got_ofst(.LCPI32_0)( @@ -1086,13 +1086,13 @@ ; 64-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]] ; FIXME: This instruction is redundant. ; 64-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1 -; 64-CMP-DAG: bnezc $[[T4]], +; 64-CMP-DAG: beqzc $[[T4]], ; MM32R3-DAG: add.s $[[T0:f[0-9]+]], $f14, $f12 ; MM32R3-DAG: lui $[[T1:[0-9]+]], %hi($CPI32_0) ; MM32R3-DAG: lwc1 $[[T2:f[0-9]+]], %lo($CPI32_0)($[[T1]]) ; MM32R3-DAG: c.ole.s $[[T0]], $[[T2]] -; MM32R3-DAG: bc1t +; MM32R3-DAG: bc1f ; MM32R6-DAG: add.s $[[T0:f[0-9]+]], $f14, $f12 ; MM32R6-DAG: lui $[[T1:[0-9]+]], %hi($CPI32_0) @@ -1100,7 +1100,7 @@ ; MM32R6-DAG: cmp.le.s $[[T3:f[0-9]+]], $[[T0]], $[[T2]] ; MM32R6-DAG: mfc1 $[[T4:[0-9]+]], $[[T3:f[0-9]+]] ; MM32R6-DAG: andi16 $[[T5:[0-9]+]], $[[T4]], 1 -; MM32R6-DAG: bnez $[[T5]], +; MM32R6-DAG: beqz $[[T5]], ; MM64R6-DAG: lui $[[T0:[0-9]+]], %hi(%neg(%gp_rel(bug1_f32))) ; MM64R6-DAG: daddu $[[T1:[0-9]+]], $[[T0]], $25 @@ -1111,7 +1111,7 @@ ; MM64R6-DAG: cmp.le.s $[[T6:f[0-9]+]], $[[T3]], $[[T5]] ; MM64R6-DAG: mfc1 $[[T7:[0-9]+]], $[[T6]] ; MM64R6-DAG: andi16 $[[T8:[0-9]+]], $[[T7]], 1 -; MM64R6-DAG: bnez $[[T8]], +; MM64R6-DAG: beqz $[[T8]], %add = fadd fast float %at, %angle %cmp = fcmp ogt float %add, 1.000000e+00 @@ -1134,7 +1134,7 @@ ; 32-C-DAG: add.d $[[T0:f[0-9]+]], $f14, $f12 ; 32-C-DAG: ldc1 $[[T1:f[0-9]+]], %lo($CPI33_0)( ; 32-C-DAG: c.ole.d $[[T0]], $[[T1]] -; 32-C-DAG: bc1t +; 32-C-DAG: bc1f ; 32-CMP-DAG: add.d $[[T0:f[0-9]+]], $f14, $f12 ; 32-CMP-DAG: ldc1 $[[T1:f[0-9]+]], %lo($CPI33_0)( @@ -1142,12 +1142,12 @@ ; 32-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]] ; FIXME: This instruction is redundant. ; 32-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1 -; 32-CMP-DAG: bnezc $[[T4]], +; 32-CMP-DAG: beqzc $[[T4]], ; 64-C-DAG: add.d $[[T0:f[0-9]+]], $f13, $f12 ; 64-C-DAG: ldc1 $[[T1:f[0-9]+]], %got_ofst(.LCPI33_0)( ; 64-C-DAG: c.ole.d $[[T0]], $[[T1]] -; 64-C-DAG: bc1t +; 64-C-DAG: bc1f ; 64-CMP-DAG: add.d $[[T0:f[0-9]+]], $f13, $f12 ; 64-CMP-DAG: ldc1 $[[T1:f[0-9]+]], %got_ofst(.LCPI33_0)( @@ -1155,13 +1155,13 @@ ; 64-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]] ; FIXME: This instruction is redundant. ; 64-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1 -; 64-CMP-DAG: bnezc $[[T4]], +; 64-CMP-DAG: beqzc $[[T4]], ; MM32R3-DAG: add.d $[[T0:f[0-9]+]], $f14, $f12 ; MM32R3-DAG: lui $[[T1:[0-9]+]], %hi($CPI33_0) ; MM32R3-DAG: ldc1 $[[T2:f[0-9]+]], %lo($CPI33_0)($[[T1]]) ; MM32R3-DAG: c.ole.d $[[T0]], $[[T2]] -; MM32R3-DAG: bc1t +; MM32R3-DAG: bc1f ; MM32R6-DAG: add.d $[[T0:f[0-9]+]], $f14, $f12 ; MM32R6-DAG: lui $[[T1:[0-9]+]], %hi($CPI33_0) @@ -1169,7 +1169,7 @@ ; MM32R6-DAG: cmp.le.d $[[T3:f[0-9]+]], $[[T0]], $[[T2]] ; MM32R6-DAG: mfc1 $[[T4:[0-9]+]], $[[T3]] ; MM32R6-DAG: andi16 $[[T5:[0-9]+]], $[[T4]], 1 -; MM32R6-DAG: bnez $[[T5]], +; MM32R6-DAG: beqz $[[T5]], ; MM64R6-DAG: lui $[[T0:[0-9]+]], %hi(%neg(%gp_rel(bug1_f64))) ; MM64R6-DAG: daddu $[[T1:[0-9]+]], $[[T0]], $25 @@ -1180,7 +1180,7 @@ ; MM64R6-DAG: cmp.le.d $[[T6:f[0-9]+]], $[[T3]], $[[T5]] ; MM64R6-DAG: mfc1 $[[T7:[0-9]+]], $[[T6]] ; MM64R6-DAG: andi16 $[[T8:[0-9]+]], $[[T7]], 1 -; MM64R6-DAG: bnez $[[T8]], +; MM64R6-DAG: beqz $[[T8]], %add = fadd fast double %at, %angle %cmp = fcmp ogt double %add, 1.000000e+00 Index: test/CodeGen/Mips/llvm-ir/ashr.ll =================================================================== --- test/CodeGen/Mips/llvm-ir/ashr.ll +++ test/CodeGen/Mips/llvm-ir/ashr.ll @@ -91,12 +91,13 @@ ; M2: sllv $[[T5:[0-9]+]], $[[T4]], $[[T3]] ; M2: or $3, $[[T3]], $[[T2]] ; M2: $[[BB0]]: - ; M2: beqz $[[T1]], $[[BB1:BB[0-9_]+]] + ; M2: bnez $[[T1]], $[[BB1:BB[0-9_]+]] ; M2: nop - ; M2: sra $2, $4, 31 - ; M2: $[[BB1]]: ; M2: jr $ra ; M2: nop + ; M2: $[[BB1]]: + ; M2: jr $ra + ; M2: sra $2, $4, 31 ; 32R1-R5: srlv $[[T0:[0-9]+]], $5, $7 ; 32R1-R5: not $[[T1:[0-9]+]], $7 @@ -177,12 +178,13 @@ ; M3: dsllv $[[T7:[0-9]+]], $[[T5]], $[[T6]] ; M3: or $3, $[[T7]], $[[T4]] ; M3: [[BB0]]: - ; M3: beqz $[[T3]], [[BB1:.LBB[0-9_]+]] + ; M3: bnez $[[T3]], [[BB1:.LBB[0-9_]+]] ; M3: nop - ; M3: dsra $2, $4, 63 - ; M3: [[BB1]]: ; M3: jr $ra ; M3: nop + ; M3: [[BB1]]: + ; M3: jr $ra + ; M3: dsra $2, $4, 63 ; GP64-NOT-R6: dsrlv $[[T0:[0-9]+]], $5, $7 ; GP64-NOT-R6: dsll $[[T1:[0-9]+]], $4, 1 Index: test/CodeGen/Mips/llvm-ir/lshr.ll =================================================================== --- test/CodeGen/Mips/llvm-ir/lshr.ll +++ test/CodeGen/Mips/llvm-ir/lshr.ll @@ -89,12 +89,13 @@ ; M2: sllv $[[T5:[0-9]+]], $[[T4]], $[[T3]] ; M2: or $3, $[[T3]], $[[T2]] ; M2: $[[BB0]]: - ; M2: bnez $[[T1]], $[[BB1:BB[0-9_]+]] + ; M2: beqz $[[T1]], $[[BB1:BB[0-9_]+]] ; M2: addiu $2, $zero, 0 - ; M2: move $2, $[[T0]] - ; M2: $[[BB1]]: ; M2: jr $ra ; M2: nop + ; M2: $[[BB1]]: + ; M2: jr $ra + ; M2: move $2, $[[T0]] ; 32R1-R5: srlv $[[T0:[0-9]+]], $5, $7 ; 32R1-R5: not $[[T1:[0-9]+]], $7 @@ -168,12 +169,13 @@ ; M3: dsllv $[[T7:[0-9]+]], $[[T5]], $[[T6]] ; M3: or $3, $[[T7]], $[[T4]] ; M3: [[BB0]]: - ; M3: bnez $[[T3]], [[BB1:\.LBB[0-9_]+]] + ; M3: beqz $[[T3]], [[BB1:\.LBB[0-9_]+]] ; M3: daddiu $2, $zero, 0 - ; M3: move $2, $[[T1]] - ; M3: [[BB1]]: ; M3: jr $ra ; M3: nop + ; M3: [[BB1]]: + ; M3: jr $ra + ; M3: move $2, $[[T1]] ; GP64-NOT-R6: dsrlv $[[T0:[0-9]+]], $5, $7 ; GP64-NOT-R6: dsll $[[T1:[0-9]+]], $4, 1 Index: test/CodeGen/Mips/llvm-ir/select-dbl.ll =================================================================== --- test/CodeGen/Mips/llvm-ir/select-dbl.ll +++ test/CodeGen/Mips/llvm-ir/select-dbl.ll @@ -58,10 +58,12 @@ ; SEL-32: sel.d $f0, $[[F1]], $[[F0]] ; M3: andi $[[T0:[0-9]+]], $4, 1 - ; M3: bnez $[[T0]], [[BB0:.LBB[0-9_]+]] + ; M3: beqz $[[T0]], [[BB0:.LBB[0-9_]+]] ; M3: nop - ; M3: mov.d $f13, $f14 + ; M3: jr $ra + ; M3: mov.d $f0, $f13 ; M3: [[BB0]]: + ; M3: mov.d $f13, $f14 ; M3: jr $ra ; M3: mov.d $f0, $f13 @@ -89,10 +91,12 @@ ; M2: lw $[[T0:[0-9]+]], 16($sp) ; M2: andi $[[T1:[0-9]+]], $[[T0]], 1 - ; M2: bnez $[[T1]], $[[BB0:BB[0-9_]+]] + ; M2: beqz $[[T1]], $[[BB0:BB[0-9_]+]] ; M2: nop - ; M2: mov.d $f12, $f14 + ; M2: jr $ra + ; M2: mov.d $f0, $f12 ; M2: $[[BB0]]: + ; M2: mov.d $f12, $f14 ; M2: jr $ra ; M2: mov.d $f0, $f12 @@ -106,10 +110,12 @@ ; SEL-32: sel.d $f0, $f14, $f12 ; M3: andi $[[T0:[0-9]+]], $6, 1 - ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]] + ; M3: beqz $[[T0]], [[BB0:\.LBB[0-9_]+]] ; M3: nop - ; M3: mov.d $f12, $f13 + ; M3: jr $ra + ; M3: mov.d $f0, $f12 ; M3: [[BB0]]: + ; M3: mov.d $f12, $f13 ; M3: jr $ra ; M3: mov.d $f0, $f12 @@ -135,12 +141,14 @@ ; M2: c.olt.d $f12, $f14 ; M3: c.olt.d $f12, $f13 - ; M2: bc1t [[BB0:\$BB[0-9_]+]] - ; M3: bc1t [[BB0:\.LBB[0-9_]+]] + ; M2: bc1f [[BB0:\$BB[0-9_]+]] + ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop + ; M2-M3: jr $ra + ; M2-M3: mov.d $f0, $f12 + ; M2-M3: [[BB0]]: ; M2: mov.d $f12, $f14 ; M3: mov.d $f12, $f13 - ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.d $f0, $f12 @@ -173,12 +181,14 @@ ; M2: c.ole.d $f12, $f14 ; M3: c.ole.d $f12, $f13 - ; M2: bc1t [[BB0:\$BB[0-9_]+]] - ; M3: bc1t [[BB0:\.LBB[0-9_]+]] + ; M2: bc1f [[BB0:\$BB[0-9_]+]] + ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop + ; M2-M3: jr $ra + ; M2-M3: mov.d $f0, $f12 + ; M2-M3: [[BB0]]: ; M2: mov.d $f12, $f14 ; M3: mov.d $f12, $f13 - ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.d $f0, $f12 @@ -211,12 +221,14 @@ ; M2: c.ule.d $f12, $f14 ; M3: c.ule.d $f12, $f13 - ; M2: bc1f [[BB0:\$BB[0-9_]+]] - ; M3: bc1f [[BB0:\.LBB[0-9_]+]] + ; M2: bc1t [[BB0:\$BB[0-9_]+]] + ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop + ; M2-M3: jr $ra + ; M2-M3: mov.d $f0, $f12 + ; M2-M3: [[BB0]]: ; M2: mov.d $f12, $f14 ; M3: mov.d $f12, $f13 - ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.d $f0, $f12 @@ -249,12 +261,14 @@ ; M2: c.ult.d $f12, $f14 ; M3: c.ult.d $f12, $f13 - ; M2: bc1f [[BB0:\$BB[0-9_]+]] - ; M3: bc1f [[BB0:\.LBB[0-9_]+]] + ; M2: bc1t [[BB0:\$BB[0-9_]+]] + ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop + ; M2-M3: jr $ra + ; M2-M3: mov.d $f0, $f12 + ; M2-M3: [[BB0]]: ; M2: mov.d $f12, $f14 ; M3: mov.d $f12, $f13 - ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.d $f0, $f12 @@ -287,12 +301,14 @@ ; M2: c.eq.d $f12, $f14 ; M3: c.eq.d $f12, $f13 - ; M2: bc1t [[BB0:\$BB[0-9_]+]] - ; M3: bc1t [[BB0:\.LBB[0-9_]+]] + ; M2: bc1f [[BB0:\$BB[0-9_]+]] + ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop + ; M2-M3: jr $ra + ; M2-M3: mov.d $f0, $f12 + ; M2-M3: [[BB0]]: ; M2: mov.d $f12, $f14 ; M3: mov.d $f12, $f13 - ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.d $f0, $f12 @@ -325,12 +341,14 @@ ; M2: c.ueq.d $f12, $f14 ; M3: c.ueq.d $f12, $f13 - ; M2: bc1f [[BB0:\$BB[0-9_]+]] - ; M3: bc1f [[BB0:\.LBB[0-9_]+]] + ; M2: bc1t [[BB0:\$BB[0-9_]+]] + ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop + ; M2-M3: jr $ra + ; M2-M3: mov.d $f0, $f12 + ; M2-M3: [[BB0]]: ; M2: mov.d $f12, $f14 ; M3: mov.d $f12, $f13 - ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.d $f0, $f12 Index: test/CodeGen/Mips/llvm-ir/select-flt.ll =================================================================== --- test/CodeGen/Mips/llvm-ir/select-flt.ll +++ test/CodeGen/Mips/llvm-ir/select-flt.ll @@ -35,12 +35,13 @@ ; M2-M3: andi $[[T0:[0-9]+]], $4, 1 ; M2: bnez $[[T0]], [[BB0:\$BB[0-9_]+]] - ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]] + ; M3: beqz $[[T0]], [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2: jr $ra + ; M2-M3: jr $ra + ; M3: mov.s $f0, $f13 ; M2: mtc1 $6, $f0 - ; M3: mov.s $f13, $f14 ; M2-M3: [[BB0]]: + ; M3: mov.s $f13, $f14 ; M2-M3: jr $ra ; M2: mtc1 $5, $f0 ; M3: mov.s $f0, $f13 @@ -77,12 +78,14 @@ ; ALL-LABEL: tst_select_i1_float_reordered: ; M2-M3: andi $[[T0:[0-9]+]], $6, 1 - ; M2: bnez $[[T0]], [[BB0:\$BB[0-9_]+]] - ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]] + ; M2: beqz $[[T0]], [[BB0:\$BB[0-9_]+]] + ; M3: beqz $[[T0]], [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop + ; M2-M3: jr $ra + ; M2-M3: mov.s $f0, $f12 + ; M2-M3: [[BB0]]: ; M2: mov.s $f12, $f14 ; M3: mov.s $f12, $f13 - ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.s $f0, $f12 @@ -114,12 +117,14 @@ ; M2: c.olt.s $f12, $f14 ; M3: c.olt.s $f12, $f13 - ; M2: bc1t [[BB0:\$BB[0-9_]+]] - ; M3: bc1t [[BB0:\.LBB[0-9_]+]] + ; M2: bc1f [[BB0:\$BB[0-9_]+]] + ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop + ; M2-M3: jr $ra + ; M2-M3: mov.s $f0, $f12 + ; M2-M3: [[BB0]]: ; M2: mov.s $f12, $f14 ; M3: mov.s $f12, $f13 - ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.s $f0, $f12 @@ -152,12 +157,14 @@ ; M2: c.ole.s $f12, $f14 ; M3: c.ole.s $f12, $f13 - ; M2: bc1t [[BB0:\$BB[0-9_]+]] - ; M3: bc1t [[BB0:\.LBB[0-9_]+]] + ; M2: bc1f [[BB0:\$BB[0-9_]+]] + ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop + ; M2-M3: jr $ra + ; M2-M3: mov.s $f0, $f12 + ; M2-M3: [[BB0]]: ; M2: mov.s $f12, $f14 ; M3: mov.s $f12, $f13 - ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.s $f0, $f12 @@ -190,12 +197,14 @@ ; M2: c.ule.s $f12, $f14 ; M3: c.ule.s $f12, $f13 - ; M2: bc1f [[BB0:\$BB[0-9_]+]] - ; M3: bc1f [[BB0:\.LBB[0-9_]+]] + ; M2: bc1t [[BB0:\$BB[0-9_]+]] + ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop + ; M2-M3: jr $ra + ; M2-M3: mov.s $f0, $f12 + ; M2-M3: [[BB0]]: ; M2: mov.s $f12, $f14 ; M3: mov.s $f12, $f13 - ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.s $f0, $f12 @@ -228,12 +237,14 @@ ; M2: c.ult.s $f12, $f14 ; M3: c.ult.s $f12, $f13 - ; M2: bc1f [[BB0:\$BB[0-9_]+]] - ; M3: bc1f [[BB0:\.LBB[0-9_]+]] + ; M2: bc1t [[BB0:\$BB[0-9_]+]] + ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop + ; M2-M3: jr $ra + ; M2-M3: mov.s $f0, $f12 + ; M2-M3: [[BB0]]: ; M2: mov.s $f12, $f14 ; M3: mov.s $f12, $f13 - ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.s $f0, $f12 @@ -266,12 +277,14 @@ ; M2: c.eq.s $f12, $f14 ; M3: c.eq.s $f12, $f13 - ; M2: bc1t [[BB0:\$BB[0-9_]+]] - ; M3: bc1t [[BB0:\.LBB[0-9_]+]] + ; M2: bc1f [[BB0:\$BB[0-9_]+]] + ; M3: bc1f [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop + ; M2-M3: jr $ra + ; M2-M3: mov.s $f0, $f12 + ; M2-M3: [[BB0]]: ; M2: mov.s $f12, $f14 ; M3: mov.s $f12, $f13 - ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.s $f0, $f12 @@ -304,12 +317,14 @@ ; M2: c.ueq.s $f12, $f14 ; M3: c.ueq.s $f12, $f13 - ; M2: bc1f [[BB0:\$BB[0-9_]+]] - ; M3: bc1f [[BB0:\.LBB[0-9_]+]] + ; M2: bc1t [[BB0:\$BB[0-9_]+]] + ; M3: bc1t [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop + ; M2-M3: jr $ra + ; M2-M3: mov.s $f0, $f12 + ; M2-M3: [[BB0]]: ; M2: mov.s $f12, $f14 ; M3: mov.s $f12, $f13 - ; M2-M3: [[BB0]]: ; M2-M3: jr $ra ; M2-M3: mov.s $f0, $f12 Index: test/CodeGen/Mips/llvm-ir/select-int.ll =================================================================== --- test/CodeGen/Mips/llvm-ir/select-int.ll +++ test/CodeGen/Mips/llvm-ir/select-int.ll @@ -35,11 +35,13 @@ ; ALL-LABEL: tst_select_i1_i1: ; M2-M3: andi $[[T0:[0-9]+]], $4, 1 - ; M2: bnez $[[T0]], [[BB0:\$BB[0-9_]+]] - ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]] + ; M2: beqz $[[T0]], [[BB0:\$BB[0-9_]+]] + ; M3: beqz $[[T0]], [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2-M3: move $5, $6 + ; M2-M3: jr $ra + ; M2-M3: move $2, $5 ; M2-M3: [[BB0]]: + ; M2-M3: move $5, $6 ; M2-M3: jr $ra ; M2-M3: move $2, $5 @@ -71,11 +73,13 @@ ; ALL-LABEL: tst_select_i1_i8: ; M2-M3: andi $[[T0:[0-9]+]], $4, 1 - ; M2: bnez $[[T0]], [[BB0:\$BB[0-9_]+]] - ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]] + ; M2: beqz $[[T0]], [[BB0:\$BB[0-9_]+]] + ; M3: beqz $[[T0]], [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2-M3: move $5, $6 + ; M2-M3: jr $ra + ; M2-M3: move $2, $5 ; M2-M3: [[BB0]]: + ; M2-M3: move $5, $6 ; M2-M3: jr $ra ; M2-M3: move $2, $5 @@ -107,11 +111,13 @@ ; ALL-LABEL: tst_select_i1_i32: ; M2-M3: andi $[[T0:[0-9]+]], $4, 1 - ; M2: bnez $[[T0]], [[BB0:\$BB[0-9_]+]] - ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]] + ; M2: beqz $[[T0]], [[BB0:\$BB[0-9_]+]] + ; M3: beqz $[[T0]], [[BB0:\.LBB[0-9_]+]] ; M2-M3: nop - ; M2-M3: move $5, $6 + ; M2-M3: jr $ra + ; M2-M3: move $2, $5 ; M2-M3: [[BB0]]: + ; M2-M3: move $5, $6 ; M2-M3: jr $ra ; M2-M3: move $2, $5 @@ -173,10 +179,12 @@ ; SEL-32: or $3, $[[T6]], $[[T5]] ; M3: andi $[[T0:[0-9]+]], $4, 1 - ; M3: bnez $[[T0]], [[BB0:\.LBB[0-9_]+]] + ; M3: beqz $[[T0]], [[BB0:\.LBB[0-9_]+]] ; M3: nop - ; M3: move $5, $6 + ; M3: jr $ra + ; M3: move $2, $5 ; M3: [[BB0]]: + ; M3: move $5, $6 ; M3: jr $ra ; M3: move $2, $5 @@ -217,20 +225,24 @@ ; M2: addiu $[[T0:[0-9]+]], $zero, -1 ; M2: xor $[[T1:[0-9]+]], $5, $[[T0]] ; M2: sltu $[[T2:[0-9]+]], $zero, $[[T1]] - ; M2: bnez $[[T2]], [[BB0:\$BB[0-9_]+]] + ; M2: beqz $[[T2]], [[BB0:\$BB[0-9_]+]] ; M2: addiu $2, $zero, 0 - ; M2: move $2, $4 + ; M2: jr $ra + ; M2: nop ; M2: [[BB0]]: ; M2: jr $ra + ; M2: move $2, $4 ; M3: daddiu $[[T0:[0-9]+]], $zero, -1 ; M3: xor $[[T1:[0-9]+]], $5, $[[T0]] ; M3: sltu $[[T2:[0-9]+]], $zero, $[[T1]] - ; M3: bnez $[[T2]], [[BB0:\.LBB[0-9_]+]] + ; M3: beqz $[[T2]], [[BB0:\.LBB[0-9_]+]] ; M3: daddiu $2, $zero, 0 - ; M3: move $2, $4 + ; M3: jr $ra + ; M3: nop ; M3: [[BB0]]: ; M3: jr $ra + ; M3: move $2, $4 ; CMOV-32: addiu $[[T0:[0-9]+]], $zero, -1 ; CMOV-32: xor $[[T1:[0-9]+]], $5, $[[T0]] Index: test/CodeGen/Mips/llvm-ir/shl.ll =================================================================== --- test/CodeGen/Mips/llvm-ir/shl.ll +++ test/CodeGen/Mips/llvm-ir/shl.ll @@ -105,12 +105,13 @@ ; M2: srlv $[[T5:[0-9]+]], $[[T4]], $[[T3]] ; M2: or $2, $[[T2]], $[[T3]] ; M2: $[[BB0]]: - ; M2: bnez $[[T1]], $[[BB1:BB[0-9_]+]] + ; M2: beqz $[[T1]], $[[BB1:BB[0-9_]+]] ; M2: addiu $3, $zero, 0 - ; M2: move $3, $[[T0]] - ; M2: $[[BB1]]: ; M2: jr $ra ; M2: nop + ; M2: $[[BB1]]: + ; M2: jr $ra + ; M2: move $3, $[[T0]] ; 32R1-R5: sllv $[[T0:[0-9]+]], $4, $7 ; 32R1-R5: not $[[T1:[0-9]+]], $7 @@ -184,12 +185,13 @@ ; M3: dsrlv $[[T7:[0-9]+]], $[[T5]], $[[T6]] ; M3: or $2, $[[T4]], $[[T7]] ; M3: [[BB0]]: - ; M3: bnez $[[T3]], [[BB1:\.LBB[0-9_]+]] + ; M3: beqz $[[T3]], [[BB1:\.LBB[0-9_]+]] ; M3: daddiu $3, $zero, 0 - ; M3: move $3, $[[T1]] - ; M3: [[BB1]]: ; M3: jr $ra ; M3: nop + ; M3: [[BB1]]: + ; M3: jr $ra + ; M3: move $3, $[[T1]] ; GP64-NOT-R6: dsllv $[[T0:[0-9]+]], $4, $7 ; GP64-NOT-R6: dsrl $[[T1:[0-9]+]], $5, 1 Index: test/CodeGen/Mips/micromips-compact-branches.ll =================================================================== --- test/CodeGen/Mips/micromips-compact-branches.ll +++ test/CodeGen/Mips/micromips-compact-branches.ll @@ -6,7 +6,7 @@ %x = alloca i32, align 4 %0 = load i32, i32* %x, align 4 %cmp = icmp eq i32 %0, 0 - br i1 %cmp, label %if.then, label %if.end + br i1 %cmp, label %if.then, label %if.end, !prof !1 if.then: store i32 10, i32* %x, align 4 @@ -17,3 +17,4 @@ } ; CHECK: bnezc +!1 = !{!"branch_weights", i32 2, i32 1} Index: test/CodeGen/Mips/setcc-se.ll =================================================================== --- test/CodeGen/Mips/setcc-se.ll +++ test/CodeGen/Mips/setcc-se.ll @@ -31,7 +31,7 @@ ; CHECK: slti $[[R0:[0-9]+]], $4, -32768 ; MMR6: slti $[[R0:[0-9]+]], $4, -32768 ; MMR6: ,std::allocator >"* %this, %"struct.std::basic_string,std::allocator >"* %__str) { ; CHECK-LABEL: _ZNKSs7compareERKSs: -; CHECK: cbnz r0, +; CHECK: cbz r0, +; CHECK-NEXT: %bb1 +; CHECK-NEXT: pop.w ; CHECK-NEXT: %bb ; CHECK-NEXT: sub{{(.w)?}} r0, r{{[0-9]+}}, r{{[0-9]+}} -; CHECK-NEXT: %bb1 ; CHECK-NEXT: pop.w entry: %0 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string,std::allocator >"* %this) ; [#uses=3] Index: test/CodeGen/WebAssembly/implicit-def.ll =================================================================== --- test/CodeGen/WebAssembly/implicit-def.ll +++ test/CodeGen/WebAssembly/implicit-def.ll @@ -6,6 +6,7 @@ ; CONST_I32 to provide an explicit push. ; CHECK: br_if 2, +; CHECK: br_if 1, ; CHECK: i32.const $push[[L0:[0-9]+]]=, 0{{$}} ; CHECK-NEXT: return $pop[[L0]]{{$}} define i1 @f() { Index: test/CodeGen/WebAssembly/phi.ll =================================================================== --- test/CodeGen/WebAssembly/phi.ll +++ test/CodeGen/WebAssembly/phi.ll @@ -8,8 +8,9 @@ ; Basic phi triangle. ; CHECK-LABEL: test0: -; CHECK: div_s $[[NUM0:[0-9]+]]=, $0, $pop[[NUM1:[0-9]+]]{{$}} -; CHECK: return $[[NUM0]]{{$}} +; CHECK: return $0 +; CHECK: div_s $push[[NUM0:[0-9]+]]=, $0, $pop[[NUM1:[0-9]+]]{{$}} +; CHECK: return $pop[[NUM0]]{{$}} define i32 @test0(i32 %p) { entry: %t = icmp slt i32 %p, 0 Index: test/CodeGen/X86/2008-11-29-ULT-Sign.ll =================================================================== --- test/CodeGen/X86/2008-11-29-ULT-Sign.ll +++ test/CodeGen/X86/2008-11-29-ULT-Sign.ll @@ -1,11 +1,11 @@ -; RUN: llc < %s -mtriple=i686-pc-linux-gnu | grep "jns" | count 1 +; RUN: llc < %s -mtriple=i686-pc-linux-gnu | grep "js" | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" target triple = "i686-pc-linux-gnu" define i32 @a(i32 %x) nounwind { entry: - %cmp = icmp ult i32 %x, -2147483648 ; [#uses=1] - br i1 %cmp, label %if.end, label %if.then + %cmp = icmp uge i32 %x, -2147483648 ; [#uses=1] + br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry %call = call i32 (...) @b() ; [#uses=0] Index: test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll =================================================================== --- test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll +++ test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll @@ -13,10 +13,11 @@ ; CHECK-NEXT: movdqa (%rdi), %xmm0 ; CHECK-NEXT: pextrd $2, %xmm0, %eax ; CHECK-NEXT: cmpl $3, %eax -; CHECK-NEXT: je .LBB0_2 -; CHECK-NEXT: # BB#1: +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_1: ; CHECK-NEXT: pxor %xmm0, %xmm0 -; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: retq %v = load <4 x i32>, <4 x i32>* %p %e = extractelement <4 x i32> %v, i32 2 Index: test/CodeGen/X86/2012-08-17-legalizer-crash.ll =================================================================== --- test/CodeGen/X86/2012-08-17-legalizer-crash.ll +++ test/CodeGen/X86/2012-08-17-legalizer-crash.ll @@ -26,5 +26,5 @@ ret void ; CHECK-LABEL: fn1: -; CHECK: jb +; CHECK: jae } Index: test/CodeGen/X86/add.ll =================================================================== --- test/CodeGen/X86/add.ll +++ test/CodeGen/X86/add.ll @@ -30,7 +30,8 @@ %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) %sum = extractvalue {i32, i1} %t, 0 %obit = extractvalue {i32, i1} %t, 1 - br i1 %obit, label %overflow, label %normal + %notobit = xor i1 1, %obit + br i1 %notobit, label %normal, label %overflow normal: store i32 0, i32* %X @@ -41,11 +42,11 @@ ; X32-LABEL: test4: ; X32: addl -; X32-NEXT: jo +; X32-NEXT: jno ; X64-LABEL: test4: ; X64: addl %e[[A1:si|dx]], %e[[A0:di|cx]] -; X64-NEXT: jo +; X64-NEXT: jno } define i1 @test5(i32 %v1, i32 %v2, i32* %X) nounwind { @@ -53,7 +54,8 @@ %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) %sum = extractvalue {i32, i1} %t, 0 %obit = extractvalue {i32, i1} %t, 1 - br i1 %obit, label %carry, label %normal + %notobit = xor i1 1, %obit + br i1 %notobit, label %normal, label %carry normal: store i32 0, i32* %X @@ -64,11 +66,11 @@ ; X32-LABEL: test5: ; X32: addl -; X32-NEXT: jb +; X32-NEXT: jae ; X64-LABEL: test5: ; X64: addl %e[[A1]], %e[[A0]] -; X64-NEXT: jb +; X64-NEXT: jae } declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) Index: test/CodeGen/X86/avx-select.ll =================================================================== --- test/CodeGen/X86/avx-select.ll +++ test/CodeGen/X86/avx-select.ll @@ -6,10 +6,12 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; CHECK-NEXT: cmpl $255, %edi -; CHECK-NEXT: je LBB0_2 -; CHECK-NEXT: ## BB#1: +; CHECK-NEXT: jne LBB0_1 +; CHECK-NEXT: ## BB#2: +; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: retq +; CHECK-NEXT: LBB0_1: ; CHECK-NEXT: vmovaps %ymm0, %ymm1 -; CHECK-NEXT: LBB0_2: ; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq %cmpres = icmp eq i32 %a, 255 @@ -23,10 +25,12 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; CHECK-NEXT: cmpl $255, %edi -; CHECK-NEXT: je LBB1_2 -; CHECK-NEXT: ## BB#1: +; CHECK-NEXT: jne LBB1_1 +; CHECK-NEXT: ## BB#2: +; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: retq +; CHECK-NEXT: LBB1_1: ; CHECK-NEXT: vmovaps %ymm0, %ymm1 -; CHECK-NEXT: LBB1_2: ; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq %cmpres = icmp eq i32 %a, 255 Index: test/CodeGen/X86/avx-splat.ll =================================================================== --- test/CodeGen/X86/avx-splat.ll +++ test/CodeGen/X86/avx-splat.ll @@ -62,8 +62,10 @@ ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ## implicit-def: %YMM0 ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne LBB4_2 -; CHECK-NEXT: ## BB#1: ## %load.i1247 +; CHECK-NEXT: je LBB4_1 +; CHECK-NEXT: ## BB#2: ## %__load_and_broadcast_32.exit1249 +; CHECK-NEXT: retq +; CHECK-NEXT: LBB4_1: ## %load.i1247 ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: andq $-32, %rsp @@ -71,7 +73,6 @@ ; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0 ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp -; CHECK-NEXT: LBB4_2: ## %__load_and_broadcast_32.exit1249 ; CHECK-NEXT: retq allocas: %udx495 = alloca [18 x [18 x float]], align 32 Index: test/CodeGen/X86/avx512-calling-conv.ll =================================================================== --- test/CodeGen/X86/avx512-calling-conv.ll +++ test/CodeGen/X86/avx512-calling-conv.ll @@ -329,19 +329,21 @@ ; ALL_X64-LABEL: test8: ; ALL_X64: ## BB#0: ; ALL_X64-NEXT: testb $1, %dil -; ALL_X64-NEXT: jne LBB8_2 -; ALL_X64-NEXT: ## BB#1: +; ALL_X64-NEXT: je LBB8_1 +; ALL_X64-NEXT: ## BB#2: +; ALL_X64-NEXT: retq +; ALL_X64-NEXT: LBB8_1: ; ALL_X64-NEXT: vmovaps %xmm1, %xmm0 -; ALL_X64-NEXT: LBB8_2: ; ALL_X64-NEXT: retq ; ; KNL_X32-LABEL: test8: ; KNL_X32: ## BB#0: ; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp) -; KNL_X32-NEXT: jne LBB8_2 -; KNL_X32-NEXT: ## BB#1: +; KNL_X32-NEXT: je LBB8_1 +; KNL_X32-NEXT: ## BB#2: +; KNL_X32-NEXT: retl +; KNL_X32-NEXT: LBB8_1: ; KNL_X32-NEXT: vmovaps %xmm1, %xmm0 -; KNL_X32-NEXT: LBB8_2: ; KNL_X32-NEXT: retl %res = select i1 %cond, <16 x i8> %a1, <16 x i8> %a2 ret <16 x i8> %res Index: test/CodeGen/X86/avx512-cmp.ll =================================================================== --- test/CodeGen/X86/avx512-cmp.ll +++ test/CodeGen/X86/avx512-cmp.ll @@ -69,13 +69,14 @@ ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; ALL-NEXT: vucomiss %xmm1, %xmm0 ; ALL-NEXT: jne LBB3_1 -; ALL-NEXT: jnp LBB3_2 +; ALL-NEXT: jp LBB3_1 +; ALL-NEXT: ## BB#2: ## %return +; ALL-NEXT: retq ; ALL-NEXT: LBB3_1: ## %if.end ; ALL-NEXT: seta %al ; ALL-NEXT: movzbl %al, %eax ; ALL-NEXT: leaq {{.*}}(%rip), %rcx ; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; ALL-NEXT: LBB3_2: ## %return ; ALL-NEXT: retq entry: %cmp = fcmp oeq float %p, 0.000000e+00 Index: test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512-mask-op.ll +++ test/CodeGen/X86/avx512-mask-op.ll @@ -411,10 +411,11 @@ ; KNL-LABEL: test11: ; KNL: ## BB#0: ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: jg LBB20_2 -; KNL-NEXT: ## BB#1: +; KNL-NEXT: jle LBB20_1 +; KNL-NEXT: ## BB#2: +; KNL-NEXT: retq +; KNL-NEXT: LBB20_1: ; KNL-NEXT: vmovaps %xmm1, %xmm0 -; KNL-NEXT: LBB20_2: ; KNL-NEXT: retq ; ; SKX-LABEL: test11: Index: test/CodeGen/X86/avx512-select.ll =================================================================== --- test/CodeGen/X86/avx512-select.ll +++ test/CodeGen/X86/avx512-select.ll @@ -6,10 +6,12 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; CHECK-NEXT: cmpl $255, %edi -; CHECK-NEXT: je LBB0_2 -; CHECK-NEXT: ## BB#1: +; CHECK-NEXT: jne LBB0_1 +; CHECK-NEXT: ## BB#2: +; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: retq +; CHECK-NEXT: LBB0_1: ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 -; CHECK-NEXT: LBB0_2: ; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %cmpres = icmp eq i32 %a, 255 @@ -23,10 +25,12 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; CHECK-NEXT: cmpl $255, %edi -; CHECK-NEXT: je LBB1_2 -; CHECK-NEXT: ## BB#1: +; CHECK-NEXT: jne LBB1_1 +; CHECK-NEXT: ## BB#2: +; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: retq +; CHECK-NEXT: LBB1_1: ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 -; CHECK-NEXT: LBB1_2: ; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %cmpres = icmp eq i32 %a, 255 Index: test/CodeGen/X86/bt.ll =================================================================== --- test/CodeGen/X86/bt.ll +++ test/CodeGen/X86/bt.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s @@ -23,8 +24,14 @@ ; CHECK-LABEL: test2: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi -; CHECK-NEXT: jb .LBB0_2 -; +; CHECK-NEXT: jae .LBB0_1 +; CHECK-NEXT: # BB#2: # %UnifiedReturnBlock +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %tmp29 = lshr i32 %x, %n %tmp3 = and i32 %tmp29, 1 @@ -44,12 +51,17 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: jb .LBB1_2 -; +; CHECK-NEXT: # BB#1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .LBB1_2: # %UnifiedReturnBlock +; CHECK-NEXT: retq entry: %tmp29 = lshr i32 %x, %n %tmp3 = and i32 1, %tmp29 %tmp4 = icmp eq i32 %tmp3, 0 - br i1 %tmp4, label %bb, label %UnifiedReturnBlock + br i1 %tmp4, label %bb, label %UnifiedReturnBlock, !prof !1 bb: call void @foo() @@ -63,8 +75,14 @@ ; CHECK-LABEL: atest2: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi -; CHECK-NEXT: jb .LBB2_2 -; +; CHECK-NEXT: jae .LBB2_1 +; CHECK-NEXT: # BB#2: # %UnifiedReturnBlock +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB2_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %tmp29 = ashr i32 %x, %n %tmp3 = and i32 %tmp29, 1 @@ -84,12 +102,17 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: jb .LBB3_2 -; +; CHECK-NEXT: # BB#1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .LBB3_2: # %UnifiedReturnBlock +; CHECK-NEXT: retq entry: %tmp29 = ashr i32 %x, %n %tmp3 = and i32 1, %tmp29 %tmp4 = icmp eq i32 %tmp3, 0 - br i1 %tmp4, label %bb, label %UnifiedReturnBlock + br i1 %tmp4, label %bb, label %UnifiedReturnBlock, !prof !1 bb: call void @foo() @@ -104,12 +127,17 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: jb .LBB4_2 -; +; CHECK-NEXT: # BB#1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .LBB4_2: # %UnifiedReturnBlock +; CHECK-NEXT: retq entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %tmp29, %x %tmp4 = icmp eq i32 %tmp3, 0 - br i1 %tmp4, label %bb, label %UnifiedReturnBlock + br i1 %tmp4, label %bb, label %UnifiedReturnBlock, !prof !1 bb: call void @foo() @@ -124,12 +152,17 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: jb .LBB5_2 -; +; CHECK-NEXT: # BB#1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .LBB5_2: # %UnifiedReturnBlock +; CHECK-NEXT: retq entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %x, %tmp29 %tmp4 = icmp eq i32 %tmp3, 0 - br i1 %tmp4, label %bb, label %UnifiedReturnBlock + br i1 %tmp4, label %bb, label %UnifiedReturnBlock, !prof !1 bb: call void @foo() @@ -143,8 +176,14 @@ ; CHECK-LABEL: testne2: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi -; CHECK-NEXT: jae .LBB6_2 -; +; CHECK-NEXT: jb .LBB6_1 +; CHECK-NEXT: # BB#2: # %UnifiedReturnBlock +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB6_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %tmp29 = lshr i32 %x, %n %tmp3 = and i32 %tmp29, 1 @@ -164,7 +203,12 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: jae .LBB7_2 -; +; CHECK-NEXT: # BB#1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .LBB7_2: # %UnifiedReturnBlock +; CHECK-NEXT: retq entry: %tmp29 = lshr i32 %x, %n %tmp3 = and i32 1, %tmp29 @@ -183,8 +227,14 @@ ; CHECK-LABEL: atestne2: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi -; CHECK-NEXT: jae .LBB8_2 -; +; CHECK-NEXT: jb .LBB8_1 +; CHECK-NEXT: # BB#2: # %UnifiedReturnBlock +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB8_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %tmp29 = ashr i32 %x, %n %tmp3 = and i32 %tmp29, 1 @@ -204,7 +254,12 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: jae .LBB9_2 -; +; CHECK-NEXT: # BB#1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .LBB9_2: # %UnifiedReturnBlock +; CHECK-NEXT: retq entry: %tmp29 = ashr i32 %x, %n %tmp3 = and i32 1, %tmp29 @@ -224,7 +279,12 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: jae .LBB10_2 -; +; CHECK-NEXT: # BB#1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .LBB10_2: # %UnifiedReturnBlock +; CHECK-NEXT: retq entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %tmp29, %x @@ -244,7 +304,12 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: jae .LBB11_2 -; +; CHECK-NEXT: # BB#1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .LBB11_2: # %UnifiedReturnBlock +; CHECK-NEXT: retq entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %x, %tmp29 @@ -263,8 +328,14 @@ ; CHECK-LABEL: query2: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi -; CHECK-NEXT: jae .LBB12_2 -; +; CHECK-NEXT: jb .LBB12_1 +; CHECK-NEXT: # BB#2: # %UnifiedReturnBlock +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB12_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %tmp29 = lshr i32 %x, %n %tmp3 = and i32 %tmp29, 1 @@ -283,8 +354,14 @@ ; CHECK-LABEL: query2b: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi -; CHECK-NEXT: jae .LBB13_2 -; +; CHECK-NEXT: jb .LBB13_1 +; CHECK-NEXT: # BB#2: # %UnifiedReturnBlock +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB13_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %tmp29 = lshr i32 %x, %n %tmp3 = and i32 1, %tmp29 @@ -303,8 +380,14 @@ ; CHECK-LABEL: aquery2: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi -; CHECK-NEXT: jae .LBB14_2 -; +; CHECK-NEXT: jb .LBB14_1 +; CHECK-NEXT: # BB#2: # %UnifiedReturnBlock +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB14_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %tmp29 = ashr i32 %x, %n %tmp3 = and i32 %tmp29, 1 @@ -323,8 +406,14 @@ ; CHECK-LABEL: aquery2b: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi -; CHECK-NEXT: jae .LBB15_2 -; +; CHECK-NEXT: jb .LBB15_1 +; CHECK-NEXT: # BB#2: # %UnifiedReturnBlock +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB15_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %tmp29 = ashr i32 %x, %n %tmp3 = and i32 1, %tmp29 @@ -343,8 +432,14 @@ ; CHECK-LABEL: query3: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi -; CHECK-NEXT: jae .LBB16_2 -; +; CHECK-NEXT: jb .LBB16_1 +; CHECK-NEXT: # BB#2: # %UnifiedReturnBlock +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB16_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %tmp29, %x @@ -363,8 +458,14 @@ ; CHECK-LABEL: query3b: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi -; CHECK-NEXT: jae .LBB17_2 -; +; CHECK-NEXT: jb .LBB17_1 +; CHECK-NEXT: # BB#2: # %UnifiedReturnBlock +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB17_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %x, %tmp29 @@ -383,8 +484,14 @@ ; CHECK-LABEL: query3x: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi -; CHECK-NEXT: jae .LBB18_2 -; +; CHECK-NEXT: jb .LBB18_1 +; CHECK-NEXT: # BB#2: # %UnifiedReturnBlock +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB18_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %tmp29, %x @@ -403,8 +510,14 @@ ; CHECK-LABEL: query3bx: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi -; CHECK-NEXT: jae .LBB19_2 -; +; CHECK-NEXT: jb .LBB19_1 +; CHECK-NEXT: # BB#2: # %UnifiedReturnBlock +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB19_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %x, %tmp29 @@ -423,8 +536,14 @@ ; CHECK-LABEL: queryne2: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi -; CHECK-NEXT: jb .LBB20_2 -; +; CHECK-NEXT: jae .LBB20_1 +; CHECK-NEXT: # BB#2: # %UnifiedReturnBlock +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB20_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %tmp29 = lshr i32 %x, %n %tmp3 = and i32 %tmp29, 1 @@ -443,8 +562,14 @@ ; CHECK-LABEL: queryne2b: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi -; CHECK-NEXT: jb .LBB21_2 -; +; CHECK-NEXT: jae .LBB21_1 +; CHECK-NEXT: # BB#2: # %UnifiedReturnBlock +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB21_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %tmp29 = lshr i32 %x, %n %tmp3 = and i32 1, %tmp29 @@ -463,8 +588,14 @@ ; CHECK-LABEL: aqueryne2: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi -; CHECK-NEXT: jb .LBB22_2 -; +; CHECK-NEXT: jae .LBB22_1 +; CHECK-NEXT: # BB#2: # %UnifiedReturnBlock +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB22_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %tmp29 = ashr i32 %x, %n %tmp3 = and i32 %tmp29, 1 @@ -483,8 +614,14 @@ ; CHECK-LABEL: aqueryne2b: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi -; CHECK-NEXT: jb .LBB23_2 -; +; CHECK-NEXT: jae .LBB23_1 +; CHECK-NEXT: # BB#2: # %UnifiedReturnBlock +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB23_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %tmp29 = ashr i32 %x, %n %tmp3 = and i32 1, %tmp29 @@ -503,8 +640,14 @@ ; CHECK-LABEL: queryne3: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi -; CHECK-NEXT: jb .LBB24_2 -; +; CHECK-NEXT: jae .LBB24_1 +; CHECK-NEXT: # BB#2: # %UnifiedReturnBlock +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB24_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %tmp29, %x @@ -523,8 +666,14 @@ ; CHECK-LABEL: queryne3b: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi -; CHECK-NEXT: jb .LBB25_2 -; +; CHECK-NEXT: jae .LBB25_1 +; CHECK-NEXT: # BB#2: # %UnifiedReturnBlock +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB25_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %x, %tmp29 @@ -543,8 +692,14 @@ ; CHECK-LABEL: queryne3x: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi -; CHECK-NEXT: jb .LBB26_2 -; +; CHECK-NEXT: jae .LBB26_1 +; CHECK-NEXT: # BB#2: # %UnifiedReturnBlock +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB26_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %tmp29, %x @@ -563,8 +718,14 @@ ; CHECK-LABEL: queryne3bx: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi -; CHECK-NEXT: jb .LBB27_2 -; +; CHECK-NEXT: jae .LBB27_1 +; CHECK-NEXT: # BB#2: # %UnifiedReturnBlock +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB27_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %x, %tmp29 @@ -588,7 +749,6 @@ ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: setb %al ; CHECK-NEXT: retq -; %neg = xor i32 %flags, -1 %shl = shl i32 1, %flag %and = and i32 %shl, %neg @@ -598,8 +758,10 @@ define zeroext i1 @extend(i32 %bit, i64 %bits) { ; CHECK-LABEL: extend: -; CHECK: # BB#0: -; CHECK-NEXT: btl %edi, %esi +; CHECK: # BB#0: # %entry +; CHECK-NEXT: btl %edi, %esi +; CHECK-NEXT: setb %al +; CHECK-NEXT: retq entry: %and = and i32 %bit, 31 %sh_prom = zext i32 %and to i64 @@ -608,3 +770,5 @@ %tobool = icmp ne i64 %and1, 0 ret i1 %tobool } + +!1 = !{!"branch_weights", i32 2, i32 1} Index: test/CodeGen/X86/cmov.ll =================================================================== --- test/CodeGen/X86/cmov.ll +++ test/CodeGen/X86/cmov.ll @@ -152,7 +152,7 @@ define i8 @test7(i1 inreg %c, i8 inreg %a, i8 inreg %b) nounwind { ; CHECK-LABEL: test7: ; CHECK: testb $1, %dil -; CHECK-NEXT: jne LBB +; CHECK-NEXT: je LBB %d = select i1 %c, i8 %a, i8 %b ret i8 %d Index: test/CodeGen/X86/cmovcmov.ll =================================================================== --- test/CodeGen/X86/cmovcmov.ll +++ test/CodeGen/X86/cmovcmov.ll @@ -22,11 +22,14 @@ ; NOCMOV-NEXT: sahf ; NOCMOV-NEXT: leal 16(%esp), %eax ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] -; NOCMOV-NEXT: jp [[TBB]] -; NOCMOV-NEXT: leal 12(%esp), %eax +; NOCMOV-NEXT: jnp [[EXIT:.LBB[0-9_]+]] ; NOCMOV-NEXT:[[TBB]]: ; NOCMOV-NEXT: movl (%eax), %eax ; NOCMOV-NEXT: retl +; NOCMOV-NEXT:[[EXIT]]: +; NOCMOV-NEXT: leal 12(%esp), %eax +; NOCMOV-NEXT: movl (%eax), %eax +; NOCMOV-NEXT: retl define i32 @test_select_fcmp_oeq_i32(float %a, float %b, i32 %c, i32 %d) #0 { entry: %cmp = fcmp oeq float %a, %b @@ -96,11 +99,14 @@ ; CMOV-NEXT: ucomiss %xmm1, %xmm0 ; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] -; CMOV-NEXT: jp [[TBB]] -; CMOV-NEXT: movaps %xmm2, %xmm3 +; CMOV-NEXT: jnp [[EXIT:.LBB[0-9_]+]] ; CMOV-NEXT: [[TBB]]: ; CMOV-NEXT: movaps %xmm3, %xmm0 ; CMOV-NEXT: retq +; CMOV-NEXT: [[EXIT]]: +; CMOV-NEXT: movaps %xmm2, %xmm3 +; CMOV-NEXT: movaps %xmm3, %xmm0 +; CMOV-NEXT: retq ; NOCMOV-NEXT: flds 8(%esp) ; NOCMOV-NEXT: flds 4(%esp) @@ -109,11 +115,14 @@ ; NOCMOV-NEXT: sahf ; NOCMOV-NEXT: leal 20(%esp), %eax ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] -; NOCMOV-NEXT: jp [[TBB]] -; NOCMOV-NEXT: leal 12(%esp), %eax +; NOCMOV-NEXT: jnp [[EXIT:.LBB[0-9_]+]] ; NOCMOV-NEXT: [[TBB]]: ; NOCMOV-NEXT: fldl (%eax) ; NOCMOV-NEXT: retl +; NOCMOV-NEXT: [[EXIT]]: +; NOCMOV-NEXT: leal 12(%esp), %eax +; NOCMOV-NEXT: fldl (%eax) +; NOCMOV-NEXT: retl define double @test_select_fcmp_oeq_f64(float %a, float %b, double %c, double %d) #0 { entry: %cmp = fcmp oeq float %a, %b @@ -125,11 +134,14 @@ ; CMOV-NEXT: ucomiss %xmm1, %xmm0 ; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] -; CMOV-NEXT: jp [[TBB]] -; CMOV-NEXT: movaps %xmm2, %xmm3 +; CMOV-NEXT: jnp [[EXIT:.LBB[0-9_]+]] ; CMOV-NEXT: [[TBB]]: ; CMOV-NEXT: movaps %xmm3, %xmm0 ; CMOV-NEXT: retq +; CMOV-NEXT: [[EXIT]]: +; CMOV-NEXT: movaps %xmm2, %xmm3 +; CMOV-NEXT: movaps %xmm3, %xmm0 +; CMOV-NEXT: retq ; NOCMOV-NEXT: pushl %edi ; NOCMOV-NEXT: pushl %esi @@ -186,13 +198,15 @@ ; CMOV-NEXT: ucomiss %xmm1, %xmm0 ; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0 ; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] -; CMOV-NEXT: jp [[TBB]] -; CMOV-NEXT: xorps %xmm0, %xmm0 +; CMOV-NEXT: jnp [[EXIT:.LBB[0-9_]+]] ; CMOV-NEXT: [[TBB]]: ; CMOV-NEXT: retq +; CMOV-NEXT: [[EXIT]]: +; CMOV-NEXT: xorps %xmm0, %xmm0 +; CMOV-NEXT: retq ; NOCMOV: jne -; NOCMOV-NEXT: jp +; NOCMOV-NEXT: jnp define float @test_zext_fcmp_une(float %a, float %b) #0 { entry: %cmp = fcmp une float %a, %b @@ -208,13 +222,15 @@ ; CMOV-NEXT: ucomiss %xmm1, %xmm0 ; CMOV-NEXT: xorps %xmm0, %xmm0 ; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] -; CMOV-NEXT: jp [[TBB]] -; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0 +; CMOV-NEXT: jnp [[EXIT:.LBB[0-9_]+]] ; CMOV-NEXT: [[TBB]]: ; CMOV-NEXT: retq +; CMOV-NEXT: [[EXIT]]: +; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0 +; CMOV-NEXT: retq ; NOCMOV: jne -; NOCMOV-NEXT: jp +; NOCMOV-NEXT: jnp define float @test_zext_fcmp_oeq(float %a, float %b) #0 { entry: %cmp = fcmp oeq float %a, %b @@ -256,9 +272,11 @@ ; CMOV: movl %edx, %eax ; CMOV: [[BB1]]: ; CMOV: testl %edi, %edi -; CMOV: je [[BB2:.LBB[0-9_]+]] -; CMOV: movl %edx, %eax +; CMOV: jne [[BB2:.LBB[0-9_]+]] +; CMOV: movb %al, g8(%rip) +; CMOV: retq ; CMOV: [[BB2]]: +; CMOV: movl %edx, %eax ; CMOV: movb %al, g8(%rip) ; CMOV: retq define void @no_cascade_opt(i32 %v0, i32 %v1, i32 %v2, i32 %v3) { Index: test/CodeGen/X86/critical-edge-split-2.ll =================================================================== --- test/CodeGen/X86/critical-edge-split-2.ll +++ test/CodeGen/X86/critical-edge-split-2.ll @@ -24,6 +24,7 @@ ; CHECK-LABEL: test1: ; CHECK: testb %dil, %dil -; CHECK: jne LBB0_2 +; CHECK: je LBB0_1 +; CHECK: retq +; CHECK: LBB0_1: ; CHECK: divl -; CHECK: LBB0_2: Index: test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll =================================================================== --- test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll +++ test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll @@ -7,9 +7,10 @@ define float @select_fcmp_one_f32(float %a, float %b, float %c, float %d) { ; CHECK-LABEL: select_fcmp_one_f32 ; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: jne [[BB:LBB[0-9]+_2]] -; CHECK: [[BB]] +; CHECK-NEXT: je [[BB:LBB[0-9]+_1]] +; CHECK-NEXT: ## BB#2: ; CHECK-NEXT: movaps %xmm2, %xmm0 +; CHECK: [[BB]] %1 = fcmp one float %a, %b %2 = select i1 %1, float %c, float %d ret float %2 @@ -18,9 +19,10 @@ define double @select_fcmp_one_f64(double %a, double %b, double %c, double %d) { ; CHECK-LABEL: select_fcmp_one_f64 ; CHECK: ucomisd %xmm1, %xmm0 -; CHECK-NEXT: jne [[BB:LBB[0-9]+_2]] -; CHECK: [[BB]] +; CHECK-NEXT: je [[BB:LBB[0-9]+_1]] +; CHECK-NEXT: ## BB#2: ; CHECK-NEXT: movaps %xmm2, %xmm0 +; CHECK: [[BB]] %1 = fcmp one double %a, %b %2 = select i1 %1, double %c, double %d ret double %2 @@ -29,9 +31,10 @@ define float @select_icmp_eq_f32(i64 %a, i64 %b, float %c, float %d) { ; CHECK-LABEL: select_icmp_eq_f32 ; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: je [[BB:LBB[0-9]+_2]] -; CHECK: [[BB]] +; CHECK-NEXT: jne [[BB:LBB[0-9]+_1]] +; CHECK-NEXT: ## BB#2: ; CHECK-NEXT: retq +; CHECK: [[BB]] %1 = icmp eq i64 %a, %b %2 = select i1 %1, float %c, float %d ret float %2 @@ -40,9 +43,10 @@ define float @select_icmp_ne_f32(i64 %a, i64 %b, float %c, float %d) { ; CHECK-LABEL: select_icmp_ne_f32 ; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: jne [[BB:LBB[0-9]+_2]] -; CHECK: [[BB]] +; CHECK-NEXT: je [[BB:LBB[0-9]+_1]] +; CHECK-NEXT: ## BB#2: ; CHECK-NEXT: retq +; CHECK: [[BB]] %1 = icmp ne i64 %a, %b %2 = select i1 %1, float %c, float %d ret float %2 @@ -51,9 +55,10 @@ define float @select_icmp_ugt_f32(i64 %a, i64 %b, float %c, float %d) { ; CHECK-LABEL: select_icmp_ugt_f32 ; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: ja [[BB:LBB[0-9]+_2]] -; CHECK: [[BB]] +; CHECK-NEXT: jbe [[BB:LBB[0-9]+_1]] +; CHECK-NEXT: ## BB#2: ; CHECK-NEXT: retq +; CHECK: [[BB]] %1 = icmp ugt i64 %a, %b %2 = select i1 %1, float %c, float %d ret float %2 @@ -62,9 +67,10 @@ define float @select_icmp_uge_f32(i64 %a, i64 %b, float %c, float %d) { ; CHECK-LABEL: select_icmp_uge_f32 ; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: jae [[BB:LBB[0-9]+_2]] -; CHECK: [[BB]] +; CHECK-NEXT: jb [[BB:LBB[0-9]+_1]] +; CHECK-NEXT: ## BB#2: ; CHECK-NEXT: retq +; CHECK: [[BB]] %1 = icmp uge i64 %a, %b %2 = select i1 %1, float %c, float %d ret float %2 @@ -73,9 +79,10 @@ define float @select_icmp_ult_f32(i64 %a, i64 %b, float %c, float %d) { ; CHECK-LABEL: select_icmp_ult_f32 ; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: jb [[BB:LBB[0-9]+_2]] -; CHECK: [[BB]] +; CHECK-NEXT: jae [[BB:LBB[0-9]+_1]] +; CHECK-NEXT: ## BB#2: ; CHECK-NEXT: retq +; CHECK: [[BB]] %1 = icmp ult i64 %a, %b %2 = select i1 %1, float %c, float %d ret float %2 @@ -84,9 +91,10 @@ define float @select_icmp_ule_f32(i64 %a, i64 %b, float %c, float %d) { ; CHECK-LABEL: select_icmp_ule_f32 ; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: jbe [[BB:LBB[0-9]+_2]] -; CHECK: [[BB]] +; CHECK-NEXT: ja [[BB:LBB[0-9]+_1]] +; CHECK-NEXT: ## BB#2: ; CHECK-NEXT: retq +; CHECK: [[BB]] %1 = icmp ule i64 %a, %b %2 = select i1 %1, float %c, float %d ret float %2 @@ -95,9 +103,10 @@ define float @select_icmp_sgt_f32(i64 %a, i64 %b, float %c, float %d) { ; CHECK-LABEL: select_icmp_sgt_f32 ; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: jg [[BB:LBB[0-9]+_2]] -; CHECK: [[BB]] +; CHECK-NEXT: jle [[BB:LBB[0-9]+_1]] +; CHECK-NEXT: ## BB#2: ; CHECK-NEXT: retq +; CHECK: [[BB]] %1 = icmp sgt i64 %a, %b %2 = select i1 %1, float %c, float %d ret float %2 @@ -106,9 +115,10 @@ define float @select_icmp_sge_f32(i64 %a, i64 %b, float %c, float %d) { ; CHECK-LABEL: select_icmp_sge_f32 ; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: jge [[BB:LBB[0-9]+_2]] -; CHECK: [[BB]] +; CHECK-NEXT: jl [[BB:LBB[0-9]+_1]] +; CHECK-NEXT: ## BB#2: ; CHECK-NEXT: retq +; CHECK: [[BB]] %1 = icmp sge i64 %a, %b %2 = select i1 %1, float %c, float %d ret float %2 @@ -117,9 +127,10 @@ define float @select_icmp_slt_f32(i64 %a, i64 %b, float %c, float %d) { ; CHECK-LABEL: select_icmp_slt_f32 ; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: jl [[BB:LBB[0-9]+_2]] -; CHECK: [[BB]] +; CHECK-NEXT: jge [[BB:LBB[0-9]+_1]] +; CHECK-NEXT: ## BB#2: ; CHECK-NEXT: retq +; CHECK: [[BB]] %1 = icmp slt i64 %a, %b %2 = select i1 %1, float %c, float %d ret float %2 @@ -128,9 +139,10 @@ define float @select_icmp_sle_f32(i64 %a, i64 %b, float %c, float %d) { ; CHECK-LABEL: select_icmp_sle_f32 ; CHECK: cmpq %rsi, %rdi -; CHECK-NEXT: jle [[BB:LBB[0-9]+_2]] -; CHECK: [[BB]] +; CHECK-NEXT: jg [[BB:LBB[0-9]+_1]] +; CHECK-NEXT: ## BB#2: ; CHECK-NEXT: retq +; CHECK: [[BB]] %1 = icmp sle i64 %a, %b %2 = select i1 %1, float %c, float %d ret float %2 Index: test/CodeGen/X86/fp-une-cmp.ll =================================================================== --- test/CodeGen/X86/fp-une-cmp.ll +++ test/CodeGen/X86/fp-une-cmp.ll @@ -36,8 +36,8 @@ entry: %mul = fmul double %x, %y - %cmp = fcmp une double %mul, 0.000000e+00 - br i1 %cmp, label %bb2, label %bb1 + %cmp = fcmp oeq double %mul, 0.000000e+00 + br i1 %cmp, label %bb1, label %bb2 bb1: %add = fadd double %mul, -1.000000e+00 Index: test/CodeGen/X86/fp128-i128.ll =================================================================== --- test/CodeGen/X86/fp128-i128.ll +++ test/CodeGen/X86/fp128-i128.ll @@ -132,7 +132,7 @@ ; CHECK-LABEL: TestI128_2: ; CHECK: movaps %xmm0, -24(%rsp) ; CHECK-NEXT: cmpq $0, -16(%rsp) -; CHECK-NEXT: jns +; CHECK-NEXT: js ; CHECK: movaps %xmm1, %xmm0 ; CHECK: retq } @@ -259,7 +259,7 @@ ; CHECK-LABEL: TestComp: ; CHECK: movaps %xmm0, -24(%rsp) ; CHECK-NEXT: cmpq $0, -16(%rsp) -; CHECK-NEXT: jns +; CHECK-NEXT: js ; CHECK: movaps %xmm1, %xmm0 ; CHECK: retq } Index: test/CodeGen/X86/jump_sign.ll =================================================================== --- test/CodeGen/X86/jump_sign.ll +++ test/CodeGen/X86/jump_sign.ll @@ -6,7 +6,7 @@ ; CHECK: jns %tmp1 = add i32 %X, 1 ; [#uses=1] %tmp = icmp slt i32 %tmp1, 0 ; [#uses=1] - br i1 %tmp, label %cond_true, label %cond_next + br i1 %tmp, label %cond_true, label %cond_next, !prof !1 cond_true: ; preds = %entry %tmp2 = tail call i32 (...) @bar( ) ; [#uses=0] @@ -303,3 +303,5 @@ if.end: ret i32 undef } + +!1 = !{!"branch_weights", i32 2, i32 1} Index: test/CodeGen/X86/machine-cse.ll =================================================================== --- test/CodeGen/X86/machine-cse.ll +++ test/CodeGen/X86/machine-cse.ll @@ -85,9 +85,9 @@ entry: ; CHECK-LABEL: cross_mbb_phys_cse: ; CHECK: cmpl -; CHECK: ja - %cmp = icmp ugt i32 %a, %b - br i1 %cmp, label %return, label %if.end +; CHECK: jbe + %cmp = icmp ule i32 %a, %b + br i1 %cmp, label %if.end, label %return if.end: ; preds = %entry ; CHECK-NOT: cmpl Index: test/CodeGen/X86/pr5145.ll =================================================================== --- test/CodeGen/X86/pr5145.ll +++ test/CodeGen/X86/pr5145.ll @@ -1,31 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=x86-64 < %s | FileCheck %s @sc8 = external global i8 define void @atomic_maxmin_i8() { -; CHECK: atomic_maxmin_i8 +; CHECK-LABEL: atomic_maxmin_i8: +; CHECK: # BB#0: +; CHECK-NEXT: movb {{.*}}(%rip), %al +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpb $4, %al +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: jle .LBB0_2 +; CHECK-NEXT: # BB#3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip) +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_2: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movb $5, %cl +; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip) +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: .LBB0_4: # %atomicrmw.end +; CHECK-NEXT: movb {{.*}}(%rip), %al +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_5: # %atomicrmw.start2 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpb $7, %al +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: jge .LBB0_6 +; CHECK-NEXT: # BB#7: # %atomicrmw.start2 +; CHECK-NEXT: # in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip) +; CHECK-NEXT: jne .LBB0_5 +; CHECK-NEXT: jmp .LBB0_8 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_6: # %atomicrmw.start2 +; CHECK-NEXT: # in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: movb $6, %cl +; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip) +; CHECK-NEXT: jne .LBB0_5 +; CHECK-NEXT: .LBB0_8: # %atomicrmw.end1 +; CHECK-NEXT: movb {{.*}}(%rip), %al +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_9: # %atomicrmw.start8 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpb $7, %al +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: jbe .LBB0_10 +; CHECK-NEXT: # BB#11: # %atomicrmw.start8 +; CHECK-NEXT: # in Loop: Header=BB0_9 Depth=1 +; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip) +; CHECK-NEXT: jne .LBB0_9 +; CHECK-NEXT: jmp .LBB0_12 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_10: # %atomicrmw.start8 +; CHECK-NEXT: # in Loop: Header=BB0_9 Depth=1 +; CHECK-NEXT: movb $7, %cl +; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip) +; CHECK-NEXT: jne .LBB0_9 +; CHECK-NEXT: .LBB0_12: # %atomicrmw.end7 +; CHECK-NEXT: movb {{.*}}(%rip), %al +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_13: # %atomicrmw.start14 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpb $9, %al +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: jae .LBB0_14 +; CHECK-NEXT: # BB#15: # %atomicrmw.start14 +; CHECK-NEXT: # in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip) +; CHECK-NEXT: jne .LBB0_13 +; CHECK-NEXT: jmp .LBB0_16 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_14: # %atomicrmw.start14 +; CHECK-NEXT: # in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: movb $8, %cl +; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip) +; CHECK-NEXT: jne .LBB0_13 +; CHECK-NEXT: .LBB0_16: # %atomicrmw.end13 +; CHECK-NEXT: retq %1 = atomicrmw max i8* @sc8, i8 5 acquire -; CHECK: [[LABEL1:\.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpb -; CHECK: jg -; CHECK: lock cmpxchgb -; CHECK: jne [[LABEL1]] %2 = atomicrmw min i8* @sc8, i8 6 acquire -; CHECK: [[LABEL3:\.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpb -; CHECK: jl -; CHECK: lock cmpxchgb -; CHECK: jne [[LABEL3]] %3 = atomicrmw umax i8* @sc8, i8 7 acquire -; CHECK: [[LABEL5:\.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpb -; CHECK: ja -; CHECK: lock cmpxchgb -; CHECK: jne [[LABEL5]] %4 = atomicrmw umin i8* @sc8, i8 8 acquire -; CHECK: [[LABEL7:\.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpb -; CHECK: jb -; CHECK: lock cmpxchgb -; CHECK: jne [[LABEL7]] ret void } Index: test/CodeGen/X86/pseudo_cmov_lower2.ll =================================================================== --- test/CodeGen/X86/pseudo_cmov_lower2.ll +++ test/CodeGen/X86/pseudo_cmov_lower2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-linux-gnu -o - | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux-gnu -o - | FileCheck %s ; This test checks that only a single jae gets generated in the final code ; for lowering the CMOV pseudos that get created for this IR. The tricky part @@ -49,12 +49,14 @@ ; the operands of the resulting instructions are from the proper places. ; ; CHECK-LABEL: foo3: -; CHECK: js -; CHECK-NOT: js -; CHECK-LABEL: # BB#1: +; CHECK: jns +; CHECK-NOT: j{{n?}}s +; CHECK-LABEL: # BB#2: +; CHECK: divsd %xmm1, %xmm0 +; CHECK: ret +; CHECK-LABEL:.LBB2_1: ; CHECK-DAG: movapd %xmm2, %xmm1 ; CHECK-DAG: movapd %xmm2, %xmm0 -; CHECK-LABEL:.LBB2_2: ; CHECK: divsd %xmm1, %xmm0 ; CHECK: ret define double @foo3(i32 %p1, double %p2, double %p3, @@ -79,12 +81,14 @@ ; of the selects to give the same actual computation. ; ; CHECK-LABEL: foo4: -; CHECK: js -; CHECK-NOT: js -; CHECK-LABEL: # BB#1: +; CHECK: jns +; CHECK-NOT: j{{n?}}s +; CHECK-LABEL: # BB#2: +; CHECK: divsd %xmm1, %xmm0 +; CHECK: ret +; CHECK-LABEL:.LBB3_1: ; CHECK-DAG: movapd %xmm2, %xmm1 ; CHECK-DAG: movapd %xmm2, %xmm0 -; CHECK-LABEL:.LBB3_2: ; CHECK: divsd %xmm1, %xmm0 ; CHECK: ret define double @foo4(i32 %p1, double %p2, double %p3, Index: test/CodeGen/X86/select.ll =================================================================== --- test/CodeGen/X86/select.ll +++ test/CodeGen/X86/select.ll @@ -524,10 +524,12 @@ ; GENERIC-NEXT: cmovlel %edi, %eax ; GENERIC-NEXT: cmpl $-128, %eax ; GENERIC-NEXT: movb $-128, %cl -; GENERIC-NEXT: jl LBB22_2 -; GENERIC-NEXT: ## BB#1: +; GENERIC-NEXT: jge LBB22_1 +; GENERIC-NEXT: ## BB#2: +; GENERIC-NEXT: movb %cl, (%rsi) +; GENERIC-NEXT: retq +; GENERIC-NEXT: LBB22_1: ; GENERIC-NEXT: movl %eax, %ecx -; GENERIC-NEXT: LBB22_2: ; GENERIC-NEXT: movb %cl, (%rsi) ; GENERIC-NEXT: retq ; @@ -538,10 +540,12 @@ ; ATOM-NEXT: cmovlel %edi, %eax ; ATOM-NEXT: movb $-128, %cl ; ATOM-NEXT: cmpl $-128, %eax -; ATOM-NEXT: jl LBB22_2 -; ATOM-NEXT: ## BB#1: +; ATOM-NEXT: jge LBB22_1 +; ATOM-NEXT: ## BB#2: +; ATOM-NEXT: movb %cl, (%rsi) +; ATOM-NEXT: retq +; ATOM-NEXT: LBB22_1: ; ATOM-NEXT: movl %eax, %ecx -; ATOM-NEXT: LBB22_2: ; ATOM-NEXT: movb %cl, (%rsi) ; ATOM-NEXT: retq %cmp = icmp sgt i32 %src, 127 Index: test/CodeGen/X86/sext-i1.ll =================================================================== --- test/CodeGen/X86/sext-i1.ll +++ test/CodeGen/X86/sext-i1.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: llc < %s -mtriple=i386-unknown-unknown -disable-cgp-branch-opts | FileCheck %s --check-prefix=CHECK --check-prefix=X32 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-cgp-branch-opts | FileCheck %s --check-prefix=CHECK --check-prefix=X64 @@ -6,24 +7,34 @@ ; PR6146 define i32 @t1(i32 %x) nounwind readnone ssp { -; CHECK-LABEL: t1: -; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1 -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: ret +; X32-LABEL: t1: +; X32: # BB#0: +; X32-NEXT: cmpl $1, {{[0-9]+}}(%esp) +; X32-NEXT: sbbl %eax, %eax +; X32-NEXT: retl ; +; X64-LABEL: t1: +; X64: # BB#0: +; X64-NEXT: cmpl $1, %edi +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: retq %t0 = icmp eq i32 %x, 0 %if = select i1 %t0, i32 -1, i32 0 ret i32 %if } define i32 @t2(i32 %x) nounwind readnone ssp { -; CHECK-LABEL: t2: -; CHECK: # BB#0: -; CHECK-NEXT: cmpl $1 -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: ret +; X32-LABEL: t2: +; X32: # BB#0: +; X32-NEXT: cmpl $1, {{[0-9]+}}(%esp) +; X32-NEXT: sbbl %eax, %eax +; X32-NEXT: retl ; +; X64-LABEL: t2: +; X64: # BB#0: +; X64-NEXT: cmpl $1, %edi +; X64-NEXT: sbbl %eax, %eax +; X64-NEXT: retq %t0 = icmp eq i32 %x, 0 %if = sext i1 %t0 to i32 ret i32 %if @@ -46,7 +57,6 @@ ; X64-NEXT: cmpq %rax, %rax ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: retq -; entry: %not.tobool = icmp eq i32 undef, 0 %cond = sext i1 %not.tobool to i32 @@ -69,10 +79,11 @@ ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: orl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl $-1, %eax -; X32-NEXT: je .LBB3_2 -; X32-NEXT: # BB#1: +; X32-NEXT: jne .LBB3_1 +; X32-NEXT: # BB#2: +; X32-NEXT: retl +; X32-NEXT: .LBB3_1: ; X32-NEXT: xorl %eax, %eax -; X32-NEXT: .LBB3_2: ; X32-NEXT: retl ; ; X64-LABEL: t4: @@ -80,7 +91,6 @@ ; X64-NEXT: cmpq $1, %rdi ; X64-NEXT: sbbl %eax, %eax ; X64-NEXT: retq -; %t0 = icmp eq i64 %x, 0 %t1 = sext i1 %t0 to i32 ret i32 %t1 @@ -99,7 +109,6 @@ ; X64-NEXT: cmpl $1, %edi ; X64-NEXT: sbbq %rax, %rax ; X64-NEXT: retq -; %t0 = icmp eq i32 %x, 0 %t1 = sext i1 %t0 to i64 ret i64 %t1 Index: test/CodeGen/X86/shift-double.ll =================================================================== --- test/CodeGen/X86/shift-double.ll +++ test/CodeGen/X86/shift-double.ll @@ -14,11 +14,13 @@ ; CHECK-NEXT: shll %cl, %eax ; CHECK-NEXT: shldl %cl, %esi, %edx ; CHECK-NEXT: testb $32, %cl -; CHECK-NEXT: je .LBB0_2 -; CHECK-NEXT: # BB#1: +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_1: ; CHECK-NEXT: movl %eax, %edx ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl %shift.upgrd.1 = zext i8 %C to i64 ; [#uses=1] @@ -37,12 +39,14 @@ ; CHECK-NEXT: sarl %cl, %edx ; CHECK-NEXT: shrdl %cl, %esi, %eax ; CHECK-NEXT: testb $32, %cl -; CHECK-NEXT: je .LBB1_2 -; CHECK-NEXT: # BB#1: +; CHECK-NEXT: jne .LBB1_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB1_1: ; CHECK-NEXT: sarl $31, %esi ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: movl %esi, %edx -; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl %shift.upgrd.2 = zext i8 %C to i64 ; [#uses=1] @@ -61,11 +65,13 @@ ; CHECK-NEXT: shrl %cl, %edx ; CHECK-NEXT: shrdl %cl, %esi, %eax ; CHECK-NEXT: testb $32, %cl -; CHECK-NEXT: je .LBB2_2 -; CHECK-NEXT: # BB#1: +; CHECK-NEXT: jne .LBB2_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB2_1: ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl %shift.upgrd.3 = zext i8 %C to i64 ; [#uses=1] Index: test/CodeGen/X86/shrink-wrap-chkstk.ll =================================================================== --- test/CodeGen/X86/shrink-wrap-chkstk.ll +++ test/CodeGen/X86/shrink-wrap-chkstk.ll @@ -62,11 +62,12 @@ ; CHECK-LABEL: @use_eax_before_prologue@8: # @use_eax_before_prologue ; CHECK: movl %ecx, %eax ; CHECK: cmpl %edx, %eax -; CHECK: jge LBB1_2 +; CHECK: jl LBB1_1 +; CHECK: retl +; CHECK: LBB1_1: ; CHECK: pushl %eax ; CHECK: movl $4092, %eax ; CHECK: calll __chkstk ; CHECK: movl 4092(%esp), %eax ; CHECK: calll _doSomething -; CHECK: LBB1_2: ; CHECK: retl Index: test/CodeGen/X86/sink-hoist.ll =================================================================== --- test/CodeGen/X86/sink-hoist.ll +++ test/CodeGen/X86/sink-hoist.ll @@ -26,7 +26,8 @@ ; CHECK-LABEL: split: ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: je +; CHECK-NEXT: jne +; CHECK: ret ; CHECK: divsd ; CHECK: movapd ; CHECK: ret Index: test/CodeGen/X86/sse-scalar-fp-arith.ll =================================================================== --- test/CodeGen/X86/sse-scalar-fp-arith.ll +++ test/CodeGen/X86/sse-scalar-fp-arith.ll @@ -1110,10 +1110,12 @@ ; AVX1-LABEL: add_ss_mask: ; AVX1: # BB#0: ; AVX1-NEXT: testb $1, %dil -; AVX1-NEXT: je .LBB62_2 -; AVX1-NEXT: # BB#1: +; AVX1-NEXT: jne .LBB62_1 +; AVX1-NEXT: # BB#2: +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] +; AVX1-NEXT: retq +; AVX1-NEXT: .LBB62_1: ; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: .LBB62_2: ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] ; AVX1-NEXT: retq ; @@ -1165,10 +1167,12 @@ ; AVX1-LABEL: add_sd_mask: ; AVX1: # BB#0: ; AVX1-NEXT: testb $1, %dil -; AVX1-NEXT: je .LBB63_2 -; AVX1-NEXT: # BB#1: +; AVX1-NEXT: jne .LBB63_1 +; AVX1-NEXT: # BB#2: +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1] +; AVX1-NEXT: retq +; AVX1-NEXT: .LBB63_1: ; AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: .LBB63_2: ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; AVX1-NEXT: retq ; Index: test/CodeGen/X86/testb-je-fusion.ll =================================================================== --- test/CodeGen/X86/testb-je-fusion.ll +++ test/CodeGen/X86/testb-je-fusion.ll @@ -9,7 +9,7 @@ entry: %and = and i32 %flags, 512 %tobool = icmp eq i32 %and, 0 - br i1 %tobool, label %if.end, label %if.then + br i1 %tobool, label %if.end, label %if.then, !prof !1 if.then: br label %if.end @@ -18,3 +18,4 @@ %hasflag = phi i32 [ 1, %if.then ], [ 0, %entry ] ret i32 %hasflag } +!1 = !{!"branch_weights", i32 1, i32 2} Index: test/CodeGen/X86/use-add-flags.ll =================================================================== --- test/CodeGen/X86/use-add-flags.ll +++ test/CodeGen/X86/use-add-flags.ll @@ -43,7 +43,7 @@ ; CHECK-LABEL: test3: ; CHECK: andl $16, %e -; CHECK-NEXT: jne +; CHECK-NEXT: je define void @test3(i32 %x) nounwind { %y = and i32 %x, 16 Index: test/CodeGen/X86/vec_int_to_fp.ll =================================================================== --- test/CodeGen/X86/vec_int_to_fp.ll +++ test/CodeGen/X86/vec_int_to_fp.ll @@ -1663,10 +1663,12 @@ ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] ; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; VEX-NEXT: testq %rax, %rax -; VEX-NEXT: js .LBB39_8 -; VEX-NEXT: # BB#7: +; VEX-NEXT: jns .LBB39_7 +; VEX-NEXT: # BB#8: +; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; VEX-NEXT: retq +; VEX-NEXT: .LBB39_7: ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 -; VEX-NEXT: .LBB39_8: ; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] ; VEX-NEXT: retq ; @@ -1898,10 +1900,12 @@ ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] ; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; VEX-NEXT: testq %rax, %rax -; VEX-NEXT: js .LBB41_8 -; VEX-NEXT: # BB#7: +; VEX-NEXT: jns .LBB41_7 +; VEX-NEXT: # BB#8: +; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; VEX-NEXT: retq +; VEX-NEXT: .LBB41_7: ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 -; VEX-NEXT: .LBB41_8: ; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] ; VEX-NEXT: retq ; Index: test/CodeGen/X86/x86-shrink-wrap-unwind.ll =================================================================== --- test/CodeGen/X86/x86-shrink-wrap-unwind.ll +++ test/CodeGen/X86/x86-shrink-wrap-unwind.ll @@ -24,8 +24,12 @@ ; After the prologue is set. ; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]] ; CHECK-NEXT: cmpl %esi, [[ARG0CPY]] -; CHECK-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] +; CHECK-NEXT: jl [[BODY_LABEL:LBB[0-9_]+]] + +; CHECK: popq +; CHECK-NEXT: retq ; +; CHECK: [[BODY_LABEL]]: ; Store %a in the alloca. ; CHECK: movl [[ARG0CPY]], 4(%rsp) ; Set the alloca address in the second argument. @@ -34,7 +38,6 @@ ; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: callq _doSomething ; -; CHECK: [[EXIT_LABEL]]: ; ; Without shrink-wrapping, epilogue is in the exit block. ; Epilogue code. (What we pop does not matter.) @@ -70,8 +73,10 @@ ; After the prologue is set. ; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]] ; CHECK-NEXT: cmpl %esi, [[ARG0CPY]] -; CHECK-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] +; CHECK-NEXT: jl [[BODY_LABEL:LBB[0-9_]+]] +; CHECK: retq ; +; [[BODY_LABEL]]: ; Prologue code. ; CHECK: pushq %rbp ; CHECK: movq %rsp, %rbp @@ -86,8 +91,6 @@ ; ; Epilogue code. (What we pop does not matter.) ; CHECK: popq %rbp -; -; CHECK: [[EXIT_LABEL]]: ; CHECK-NEXT: retq define i32 @frameUnwind(i32 %a, i32 %b) #1 { %tmp = alloca i32, align 4 @@ -116,8 +119,10 @@ ; After the prologue is set. ; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]] ; CHECK-NEXT: cmpl %esi, [[ARG0CPY]] -; CHECK-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] +; CHECK-NEXT: jl [[BODY_LABEL:LBB[0-9_]+]] +; CHECK: retq ; +; CHECK: [[BODY_LABEL]]: ; Prologue code. ; (What we push does not matter. It should be some random sratch register.) ; CHECK: pushq @@ -132,8 +137,6 @@ ; ; Epilogue code. ; CHECK-NEXT: addq -; -; CHECK: [[EXIT_LABEL]]: ; CHECK-NEXT: retq define i32 @framelessnoUnwind(i32 %a, i32 %b) #2 { %tmp = alloca i32, align 4 Index: test/CodeGen/X86/x86-shrink-wrapping.ll =================================================================== --- test/CodeGen/X86/x86-shrink-wrapping.ll +++ test/CodeGen/X86/x86-shrink-wrapping.ll @@ -18,8 +18,10 @@ ; No prologue needed. ; ENABLE: movl %edi, [[ARG0CPY:%e[a-z]+]] ; ENABLE-NEXT: cmpl %esi, [[ARG0CPY]] -; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] +; ENABLE-NEXT: jl [[BODY_LABEL:LBB[0-9_]+]] ; +; ENABLE: retq +; ENABLE: [[BODY_LABEL]]: ; Prologue code. ; (What we push does not matter. It should be some random sratch register.) ; CHECK: pushq @@ -28,7 +30,9 @@ ; After the prologue is set. ; DISABLE: movl %edi, [[ARG0CPY:%e[a-z]+]] ; DISABLE-NEXT: cmpl %esi, [[ARG0CPY]] -; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] +; DISABLE-NEXT: jl [[BODY_LABEL:LBB[0-9_]+]] +; DISABLE: retq +; DISABLE: [[BODY_LABEL]]: ; ; Store %a in the alloca. ; CHECK: movl [[ARG0CPY]], 4(%rsp) @@ -41,12 +45,9 @@ ; With shrink-wrapping, epilogue is just after the call. ; ENABLE-NEXT: addq $8, %rsp ; -; CHECK: [[EXIT_LABEL]]: -; ; Without shrink-wrapping, epilogue is in the exit block. ; Epilogue code. (What we pop does not matter.) ; DISABLE-NEXT: popq -; ; CHECK-NEXT: retq define i32 @foo(i32 %a, i32 %b) { %tmp = alloca i32, align 4 Index: test/CodeGen/XCore/codemodel.ll =================================================================== --- test/CodeGen/XCore/codemodel.ll +++ test/CodeGen/XCore/codemodel.ll @@ -22,9 +22,11 @@ ; LARGE: ldaw r11, cp[.LCPI{{[0-9_]*}}] ; LARGE: mov r1, r11 ; LARGE: ldaw r11, cp[.LCPI{{[0-9_]*}}] -; LARGE: bt r0, [[JUMP:.LBB[0-9_]*]] -; LARGE: mov r11, r1 +; LARGE: bf r0, [[JUMP:.LBB[0-9_]*]] +; LARGE: ldw r0, r11[0] +; LARGE: retsp 0 ; LARGE: [[JUMP]] +; LARGE: mov r11, r1 ; LARGE: ldw r0, r11[0] ; LARGE: retsp 0 @A1 = external global [50000 x i32] Index: test/DebugInfo/X86/bbjoin.ll =================================================================== --- test/DebugInfo/X86/bbjoin.ll +++ test/DebugInfo/X86/bbjoin.ll @@ -13,11 +13,12 @@ ; CHECK: bb.0.entry: ; CHECK: DBG_VALUE 23, 0, ![[X]], ; CHECK: DBG_VALUE %rsp, 4, ![[X]] -; CHECK: bb.1.if.then: -; CHECK: DBG_VALUE 43, 0, ![[X]], ; CHECK: bb.2.if.end: ; CHECK-NOT: DBG_VALUE 23, 0, ![[X]], ; CHECK: RETQ %eax +; CHECK: bb.1.if.then: +; CHECK: DBG_VALUE 43, 0, ![[X]], +; CHECK: RETQ %eax target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.11.0"