Index: lib/CodeGen/IfConversion.cpp =================================================================== --- lib/CodeGen/IfConversion.cpp +++ lib/CodeGen/IfConversion.cpp @@ -58,6 +58,8 @@ cl::init(false), cl::Hidden); static cl::opt DisableDiamond("disable-ifcvt-diamond", cl::init(false), cl::Hidden); +static cl::opt DisableDiamondTail("disable-ifcvt-diamond-tail", + cl::init(false), cl::Hidden); static cl::opt IfCvtBranchFold("ifcvt-branch-fold", cl::init(true), cl::Hidden); @@ -68,6 +70,7 @@ STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed"); STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed"); STATISTIC(NumDiamonds, "Number of diamond if-conversions performed"); +STATISTIC(NumDiamondTails, "Number of diamond-tail if-conversions performed"); STATISTIC(NumIfConvBBs, "Number of if-converted blocks"); STATISTIC(NumDupBBs, "Number of duplicated blocks"); STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated"); @@ -82,7 +85,9 @@ ICTriangleRev, // Same as ICTriangle, but true path rev condition. ICTriangleFalse, // Same as ICTriangle, but on the false path. ICTriangle, // BB is entry of a triangle sub-CFG. - ICDiamond // BB is entry of a diamond sub-CFG. + ICDiamond, // BB is entry of a diamond sub-CFG. + ICDiamondTail // BB is entry of an almost diamond sub-CFG, with a + // shared tail. }; /// BBInfo - One per MachineBasicBlock, this is used to cache the result @@ -114,6 +119,7 @@ bool IsAnalyzed : 1; bool IsEnqueued : 1; bool IsBrAnalyzable : 1; + bool IsBrReversible : 1; bool HasFallThrough : 1; bool IsUnpredicable : 1; bool CannotBeCopied : 1; @@ -128,9 +134,10 @@ SmallVector Predicate; BBInfo() : IsDone(false), IsBeingAnalyzed(false), IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false), - HasFallThrough(false), IsUnpredicable(false), - CannotBeCopied(false), ClobbersPred(false), NonPredSize(0), - ExtraCost(0), ExtraCost2(0), BB(nullptr), TrueBB(nullptr), + IsBrReversible(false), HasFallThrough(false), + IsUnpredicable(false), CannotBeCopied(false), + ClobbersPred(false), NonPredSize(0), ExtraCost(0), + ExtraCost2(0), BB(nullptr), TrueBB(nullptr), FalseBB(nullptr) {} }; @@ -148,11 +155,15 @@ struct IfcvtToken { BBInfo &BBI; IfcvtKind Kind; - bool NeedSubsumption; unsigned NumDups; unsigned NumDups2; - IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0) - : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {} + bool NeedSubsumption : 1; + bool TClobbersPred : 1; + bool FClobbersPred : 1; + IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0, + bool tc = false, bool fc = false) + : BBI(b), Kind(k), NumDups(d), NumDups2(d2), NeedSubsumption(s), + TClobbersPred(tc), FClobbersPred(fc) {} }; /// BBAnalysis - Results of if-conversion feasibility analysis indexed by @@ -203,19 +214,39 @@ BranchProbability Prediction) const; bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, unsigned &Dups1, unsigned &Dups2) const; + bool ValidDiamondTail(BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2, + BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const; + bool RecalculateCostsAndClobbers(MachineBasicBlock::iterator &BIB, + MachineBasicBlock::iterator &BIE, + BBInfo &BBIRecalc) const; void ScanInstructions(BBInfo &BBI); void AnalyzeBlock(MachineBasicBlock *MBB, std::vector> &Tokens); bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl &Cond, bool isTriangle = false, bool RevBranch = false); + // Perform Feasability Analysis, assuming that BBI contains a shared tail. + // This disregards IsUnpredicable, as the tail may contain unpredicable + // instructions, but may be shared. It is assumed that the caller has + // verified this. + bool FeasibilityAnalysisSharedTail( + BBInfo &BBI, SmallVectorImpl &Pred); void AnalyzeBlocks(MachineFunction &MF, std::vector> &Tokens); void InvalidatePreds(MachineBasicBlock *BB); void RemoveExtraEdges(BBInfo &BBI); bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind); bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind); + bool IfConvertDiamondCommon(BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned NumDups1, unsigned NumDups2, + bool TClobbersPred, bool FClobbersPred, + bool RemoveTrueBranch, bool RemoveFalseBranch, + bool MergeAddEdges); bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, unsigned NumDups1, unsigned NumDups2); + bool IfConvertDiamondTail(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2, + bool TClobbers, bool FClobbers); void PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl &Cond, @@ -407,6 +438,18 @@ if (RetVal) ++NumDiamonds; break; } + case ICDiamondTail: { + if (DisableDiamondTail) break; + DEBUG(dbgs() << "Ifcvt (Diamond w/ tail): BB#" << BBI.BB->getNumber() << " (T:" + << BBI.TrueBB->getNumber() << ",F:" + << BBI.FalseBB->getNumber() << ") "); + RetVal = IfConvertDiamondTail(BBI, Kind, NumDups, NumDups2, + Token->TClobbersPred, + Token->FClobbersPred); + DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + if (RetVal) ++NumDiamondTails; + break; + } } Change |= RetVal; @@ -451,7 +494,10 @@ /// ReverseBranchCondition - Reverse the condition of the end of the block /// branch. Swap block's 'true' and 'false' successors. bool IfConverter::ReverseBranchCondition(BBInfo &BBI) const { - DebugLoc dl; // FIXME: this is nowhere + DebugLoc dl; + MachineBasicBlock::iterator BBIT = BBI.BB->getFirstTerminator(); + if (BBIT != BBI.BB->end()) + dl = BBIT->getDebugLoc(); if (!TII->ReverseBranchCondition(BBI.BrCond)) { TII->RemoveBranch(*BBI.BB); TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl); @@ -625,6 +671,124 @@ } } +/// ValidDiamondTail - Returns true if the 'true' and 'false' blocks (along +/// with their common predecessor) form a diamond if a common tail block is +/// extracted +bool IfConverter::ValidDiamondTail( + BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2, + BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const { + Dups1 = Dups2 = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone || + FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone) + return false; + + MachineBasicBlock *TT = TrueBBI.TrueBB; + MachineBasicBlock *TF = TrueBBI.FalseBB; + MachineBasicBlock *FT = FalseBBI.TrueBB; + MachineBasicBlock *FF = FalseBBI.FalseBB; + + if (!TrueBBI.IsBrAnalyzable || !FalseBBI.IsBrAnalyzable) + return false; + + if (!TT) + TT = getNextBlock(TrueBBI.BB); + if (!TF) + TF = getNextBlock(TrueBBI.BB); + if (!FT) + FT = getNextBlock(FalseBBI.BB); + if (!FF) + FF = getNextBlock(FalseBBI.BB); + + if (!TT || !TF) + return false; + if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) + return false; + + // Only looking for the case where it's not an actual diamond. + if (TT == TF || FT == FF) + return false; + + // Check successors. If they don't match, bail. + if (!((TT == FT && TF == FF) || (TF == FT && TT == FF))) + return false; + + if (TF == FT && TT == FF) { + // If the branches are opposing, but we can't reverse, don't do it. + if (!FalseBBI.IsBrReversible) + return false; + ReverseBranchCondition(FalseBBI); + } + + // Count duplicate instructions at the beginning of the true and false blocks. + MachineBasicBlock::iterator TIB = TrueBBI.BB->begin(); + MachineBasicBlock::iterator FIB = FalseBBI.BB->begin(); + MachineBasicBlock::iterator TIE = TrueBBI.BB->end(); + MachineBasicBlock::iterator FIE = FalseBBI.BB->end(); + countDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2, + *TrueBBI.BB, *FalseBBI.BB, + /* SkipConditionalBranches */ false); + + // TIE and FIE both point at the last instruction, move them back. + ++TIE; ++FIE; + // The size of the blocks are the same. + TrueBBICalc.NonPredSize = TrueBBI.NonPredSize; + FalseBBICalc.NonPredSize = FalseBBI.NonPredSize; + // We only count extra cost for instructions that aren't shared. + TrueBBICalc.ExtraCost = TrueBBICalc.ExtraCost2 = 0; + FalseBBICalc.ExtraCost = FalseBBICalc.ExtraCost2 = 0; + TrueBBICalc.ClobbersPred = false; + FalseBBICalc.ClobbersPred = false; + if (!RecalculateCostsAndClobbers(TIB, TIE, TrueBBICalc)) + return false; + if (!RecalculateCostsAndClobbers(FIB, FIE, FalseBBICalc)) + return false; + if (TrueBBICalc.ClobbersPred && FalseBBICalc.ClobbersPred) + return false; + return true; +} + +/// Run through the non-duplicated portion of a block and recalculate the cost +/// predicate and whether the non-duplicated portion clobbers the predicate +/// info. +/// @param BIB points to the first non-shared instruction. +/// @param BIE points to the first shared instruction in the tail, or End() if +/// no instructions are shared. +/// @param BBIRecalc a BBInfo structure to hold the results of recalculating. +/// @return true if the block can be predicated. +bool IfConverter::RecalculateCostsAndClobbers(MachineBasicBlock::iterator &BIB, + MachineBasicBlock::iterator &BIE, + BBInfo &BBIRecalc) const { + std::vector PredDefs; + while (BIB != BIE) { + // Skip dbg_value instructions. These do not count. + if (BIB->isDebugValue()) { + while (BIB != BIE && BIB->isDebugValue()) + ++BIB; + if (BIB == BIE) + break; + } + // A Cond-clobbering instruction can only occur at the end of the + // non-duplicated section. + if (BBIRecalc.ClobbersPred) + return false; + if (TII->isPredicated(*BIB)) + return false; + if (TII->DefinesPredicate(*BIB, PredDefs)) + BBIRecalc.ClobbersPred = true; + if (BIB->isBranch()) + return false; + if (!TII->isPredicable(*BIB)) + return false; + unsigned ExtraPredCost = TII->getPredicationCost(*BIB); + unsigned NumCycles = SchedModel.computeInstrLatency(&(*BIB), false); + if (NumCycles > 1) + BBIRecalc.ExtraCost += NumCycles-1; + BBIRecalc.ExtraCost2 += ExtraPredCost; + ++BIB; + } + return true; +}; /// ValidDiamond - Returns true if the 'true' and 'false' blocks (along /// with their common predecessor) forms a valid diamond shape for ifcvt. bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, @@ -680,6 +844,9 @@ BBI.BrCond.clear(); BBI.IsBrAnalyzable = !TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond); + SmallVector RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); + BBI.IsBrReversible = (RevCond.size() == 0) || + !TII->ReverseBranchCondition(RevCond); BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr; if (BBI.BrCond.size()) { @@ -819,6 +986,31 @@ return true; } +/// FeasibilityAnalysisSharedTail - Determine if the block is a suitable +/// candidate to be predicated by the specified predicate, assuming that all +/// non predicable instructions are part of a shared tail. +bool IfConverter::FeasibilityAnalysisSharedTail( + BBInfo &BBI, SmallVectorImpl &Pred) { + // If the block is dead, then it cannot be predicated. Don't check + // IsUnpredicable, because while the whole block may not be, the portion that + // is unshared may well be predicable. + if (BBI.IsDone) + return false; + + // If it is already predicated but we couldn't analyze its terminator, the + // latter might fallthrough, but we can't determine where to. + // Conservatively avoid if-converting again. + if (BBI.Predicate.size() && !BBI.IsBrAnalyzable) + return false; + + // If it is already predicated, check if the new predicate subsumes + // its predicate. + if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate)) + return false; + + return true; +} + /// AnalyzeBlock - Analyze the structure of the sub-CFG starting from /// the specified block. Record its successors and whether it looks like an /// if-conversion candidate. @@ -926,6 +1118,34 @@ Enqueued = true; } + BBInfo TrueBBICalc, FalseBBICalc; + if (CanRevCond && ValidDiamondTail(TrueBBI, FalseBBI, Dups, Dups2, + TrueBBICalc, FalseBBICalc) && + MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) + + TrueBBICalc.ExtraCost), + TrueBBICalc.ExtraCost2, + *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) + + FalseBBICalc.ExtraCost), + FalseBBICalc.ExtraCost2, + Prediction) && + FeasibilityAnalysisSharedTail(TrueBBI, BBI.BrCond) && + FeasibilityAnalysisSharedTail(FalseBBI, RevCond)) { + // DiamondTail: + // if TBB and FBB have a common tail that includes their conditional + // branch instructions, then we can If Convert this pattern. + // EBB + // _/ \_ + // | | + // TBB FBB + // / \ / \ + // FalseBB TrueBB FalseBB + // + Tokens.push_back(llvm::make_unique( + BBI, ICDiamondTail, TNeedSub | FNeedSub, Dups, Dups2, + (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred)); + Enqueued = true; + } + if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) && MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, TrueBBI.ExtraCost2, Prediction) && @@ -1363,23 +1583,24 @@ return true; } -/// IfConvertDiamond - If convert a diamond sub-CFG. -/// -bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, - unsigned NumDups1, unsigned NumDups2) { - BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; - BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; - MachineBasicBlock *TailBB = TrueBBI.TrueBB; - // True block must fall through or end with an unanalyzable terminator. - if (!TailBB) { - if (blockAlwaysFallThrough(TrueBBI)) - TailBB = FalseBBI.TrueBB; - assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!"); - } - - if (TrueBBI.IsDone || FalseBBI.IsDone || - TrueBBI.BB->pred_size() > 1 || - FalseBBI.BB->pred_size() > 1) { +/// IfConvertDiamondCommon - Common code shared between diamond conversions. +/// BBI, TrueBBI, and FalseBBI form the diamond shape. +/// NumDups1 - number of shared instructions at the beginning of TrueBBI and +/// FalseBBI +/// NumDups2 - number of shared instructions at the end of TrueBBI and FalseBBI +/// RemoveTrueBranch - Remove the branch of the true block before predicating +/// Only false for unanalyzable fallthrough cases. +/// RemoveFalseBranch - Remove the branch of the false block before predicating +/// Only false for unanalyzable fallthrough cases. +/// MergeAddEdges - Add successor edges when merging blocks. Only false for +/// unanalyzable fallthrough +bool IfConverter::IfConvertDiamondCommon( + BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned NumDups1, unsigned NumDups2, + bool TClobbersPred, bool FClobbersPred, + bool RemoveTrueBranch, bool RemoveFalseBranch, + bool MergeAddEdges) { + if (TrueBBI.IsDone || FalseBBI.IsDone) { // Something has changed. It's no longer safe to predicate these blocks. BBI.IsAnalyzed = false; TrueBBI.IsAnalyzed = false; @@ -1404,15 +1625,16 @@ // Figure out the more profitable ordering. bool DoSwap = false; - if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred) + if (TClobbersPred && !FClobbersPred) DoSwap = true; - else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) { + else if (TClobbersPred == FClobbersPred) { if (TrueBBI.NonPredSize > FalseBBI.NonPredSize) DoSwap = true; } if (DoSwap) { std::swap(BBI1, BBI2); std::swap(Cond1, Cond2); + std::swap(RemoveTrueBranch, RemoveFalseBranch); } // Remove the conditional branch from entry to the blocks. @@ -1459,11 +1681,7 @@ BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); BBI2->BB->erase(BBI2->BB->begin(), DI2); - // Remove branch from the 'true' block, unless it was not analyzable. - // Non-analyzable branches need to be preserved, since in such cases, - // the CFG structure is not an actual diamond (the join block may not - // be present). - if (BBI1->IsBrAnalyzable) + if (RemoveTrueBranch) BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB); // Remove duplicated instructions. DI1 = BBI1->BB->end(); @@ -1482,9 +1700,9 @@ // must be removed. RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI); - // Remove 'false' block branch (unless it was not analyzable), and find - // the last instruction to predicate. - if (BBI2->IsBrAnalyzable) + // Remove 'false' block branch, and find the last instruction to predicate. + // Save the debug location. + if (RemoveFalseBranch) BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); DI2 = BBI2->BB->end(); while (NumDups2 != 0) { @@ -1560,8 +1778,81 @@ PredicateBlock(*BBI2, DI2, *Cond2); // Merge the true block into the entry of the diamond. - MergeBlocks(BBI, *BBI1, TailBB == nullptr); - MergeBlocks(BBI, *BBI2, TailBB == nullptr); + MergeBlocks(BBI, *BBI1, MergeAddEdges); + MergeBlocks(BBI, *BBI2, MergeAddEdges); + return true; +} + +/// IfConvertDiamondTail - If convert an almost-diamond sub-CFG where the true +/// and false blocks share a common tail. +bool IfConverter::IfConvertDiamondTail( + BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2, + bool TClobbersPred, bool FClobbersPred) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + + // Save the debug location for later. + DebugLoc dl; + MachineBasicBlock::iterator TIE = TrueBBI.BB->getFirstTerminator(); + if (TIE != TrueBBI.BB->end()) + dl = TIE->getDebugLoc(); + // Removing branches from both blocks is safe, because we have already + // determined that both blocks have the same branch instructions. The branch + // will be added back at the end, unpredicated. + if (!IfConvertDiamondCommon( + BBI, TrueBBI, FalseBBI, + NumDups1, NumDups2, + TClobbersPred, FClobbersPred, + /* RemoveTrueBranch */ true, /* RemoveFalseBranch */ true, + /* MergeAddEdges */ true)) + return false; + + // Add back the branch. + // Debug location saved above when removing the branch from BBI2 + TII->InsertBranch(*BBI.BB, TrueBBI.TrueBB, TrueBBI.FalseBB, + TrueBBI.BrCond, dl); + + RemoveExtraEdges(BBI); + + // Update block info. + BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; + InvalidatePreds(BBI.BB); + + // FIXME: Must maintain LiveIns. + return true; +} + +/// IfConvertDiamond - If convert a diamond sub-CFG. +/// +bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + MachineBasicBlock *TailBB = TrueBBI.TrueBB; + + // True block must fall through or end with an unanalyzable terminator. + if (!TailBB) { + if (blockAlwaysFallThrough(TrueBBI)) + TailBB = FalseBBI.TrueBB; + assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!"); + } + + if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) { + BBI.IsAnalyzed = false; + TrueBBI.IsAnalyzed = false; + FalseBBI.IsAnalyzed = false; + return false; + } + + if (!IfConvertDiamondCommon( + BBI, TrueBBI, FalseBBI, + NumDups1, NumDups2, + TrueBBI.ClobbersPred, FalseBBI.ClobbersPred, + /* RemoveTrueBranch */ TrueBBI.IsBrAnalyzable, + /* RemoveFalseBranch */ FalseBBI.IsBrAnalyzable, + /* MergeAddEdges */ TailBB == nullptr)) + return false; // If the if-converted block falls through or unconditionally branches into // the tail block, and the tail block does not have other predecessors, then @@ -1584,7 +1875,7 @@ CanMergeTail = false; else if (NumPreds == 1 && CanMergeTail) { MachineBasicBlock::pred_iterator PI = TailBB->pred_begin(); - if (*PI != BBI1->BB && *PI != BBI2->BB) + if (*PI != TrueBBI.BB && *PI != FalseBBI.BB) CanMergeTail = false; } if (CanMergeTail) { @@ -1600,8 +1891,8 @@ // RemoveExtraEdges won't work if the block has an unanalyzable branch, // which can happen here if TailBB is unanalyzable and is merged, so // explicitly remove BBI1 and BBI2 as successors. - BBI.BB->removeSuccessor(BBI1->BB); - BBI.BB->removeSuccessor(BBI2->BB, true); + BBI.BB->removeSuccessor(TrueBBI.BB); + BBI.BB->removeSuccessor(FalseBBI.BB, /* NormalizeSuccessProbs */ true); RemoveExtraEdges(BBI); // Update block info. Index: test/CodeGen/Thumb2/thumb2-ifcvt1.ll =================================================================== --- test/CodeGen/Thumb2/thumb2-ifcvt1.ll +++ test/CodeGen/Thumb2/thumb2-ifcvt1.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-default-it | FileCheck %s -; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it |FileCheck %s +; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it -enable-tail-merge=0 | FileCheck %s define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; CHECK-LABEL: t1: ; CHECK: ittt ne @@ -25,9 +26,9 @@ define i32 @t2(i32 %a, i32 %b) nounwind { entry: ; CHECK-LABEL: t2: -; CHECK: ite gt -; CHECK: subgt -; CHECK: suble +; CHECK: ite {{gt|le}} +; CHECK-DAG: suble +; CHECK-DAG: subgt %tmp1434 = icmp eq i32 %a, %b ; [#uses=1] br i1 %tmp1434, label %bb17, label %bb.outer