Index: lib/CodeGen/IfConversion.cpp =================================================================== --- lib/CodeGen/IfConversion.cpp +++ lib/CodeGen/IfConversion.cpp @@ -58,6 +58,8 @@ cl::init(false), cl::Hidden); static cl::opt DisableDiamond("disable-ifcvt-diamond", cl::init(false), cl::Hidden); +static cl::opt DisableDiamondTail("disable-ifcvt-diamond-tail", + cl::init(false), cl::Hidden); static cl::opt IfCvtBranchFold("ifcvt-branch-fold", cl::init(true), cl::Hidden); @@ -68,6 +70,7 @@ STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed"); STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed"); STATISTIC(NumDiamonds, "Number of diamond if-conversions performed"); +STATISTIC(NumDiamondTails, "Number of diamond-tail if-conversions performed"); STATISTIC(NumIfConvBBs, "Number of if-converted blocks"); STATISTIC(NumDupBBs, "Number of duplicated blocks"); STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated"); @@ -82,7 +85,9 @@ ICTriangleRev, // Same as ICTriangle, but true path rev condition. ICTriangleFalse, // Same as ICTriangle, but on the false path. ICTriangle, // BB is entry of a triangle sub-CFG. - ICDiamond // BB is entry of a diamond sub-CFG. + ICDiamond, // BB is entry of a diamond sub-CFG. + ICDiamondTail // BB is entry of an almost diamond sub-CFG, with a + // shared tail. }; /// BBInfo - One per MachineBasicBlock, this is used to cache the result @@ -114,6 +119,7 @@ bool IsAnalyzed : 1; bool IsEnqueued : 1; bool IsBrAnalyzable : 1; + bool IsBrReversible : 1; bool HasFallThrough : 1; bool IsUnpredicable : 1; bool CannotBeCopied : 1; @@ -128,9 +134,10 @@ SmallVector Predicate; BBInfo() : IsDone(false), IsBeingAnalyzed(false), IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false), - HasFallThrough(false), IsUnpredicable(false), - CannotBeCopied(false), ClobbersPred(false), NonPredSize(0), - ExtraCost(0), ExtraCost2(0), BB(nullptr), TrueBB(nullptr), + IsBrReversible(false), HasFallThrough(false), + IsUnpredicable(false), CannotBeCopied(false), + ClobbersPred(false), NonPredSize(0), ExtraCost(0), + ExtraCost2(0), BB(nullptr), TrueBB(nullptr), FalseBB(nullptr) {} }; @@ -148,11 +155,15 @@ struct IfcvtToken { BBInfo &BBI; IfcvtKind Kind; - bool NeedSubsumption; unsigned NumDups; unsigned NumDups2; - IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0) - : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {} + bool NeedSubsumption : 1; + bool TClobbersPred : 1; + bool FClobbersPred : 1; + IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0, + bool tc = false, bool fc = false) + : BBI(b), Kind(k), NumDups(d), NumDups2(d2), NeedSubsumption(s), + TClobbersPred(tc), FClobbersPred(fc) {} }; /// BBAnalysis - Results of if-conversion feasibility analysis indexed by @@ -195,7 +206,7 @@ } private: - bool ReverseBranchCondition(BBInfo &BBI); + bool ReverseBranchCondition(BBInfo &BBI) const; bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups, BranchProbability Prediction) const; bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, @@ -203,11 +214,20 @@ BranchProbability Prediction) const; bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, unsigned &Dups1, unsigned &Dups2) const; + bool ValidDiamondTail(BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2, + BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const; void ScanInstructions(BBInfo &BBI); void AnalyzeBlock(MachineBasicBlock *MBB, std::vector> &Tokens); bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl &Cond, bool isTriangle = false, bool RevBranch = false); + // Perform Feasability Analysis, assuming that BBI contains a shared tail. + // This disregards IsUnpredicable, as the tail may contain unpredicable + // instructions, but may be shared. It is assumed that the caller has + // verified this. + bool FeasibilityAnalysisSharedTail( + BBInfo &BBI, SmallVectorImpl &Pred); void AnalyzeBlocks(MachineFunction &MF, std::vector> &Tokens); void InvalidatePreds(MachineBasicBlock *BB); @@ -216,6 +236,9 @@ bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind); bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, unsigned NumDups1, unsigned NumDups2); + bool IfConvertDiamondTail(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2, + bool TClobbers, bool FClobbers); void PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl &Cond, @@ -407,6 +430,18 @@ if (RetVal) ++NumDiamonds; break; } + case ICDiamondTail: { + if (DisableDiamondTail) break; + DEBUG(dbgs() << "Ifcvt (Diamond w/ tail): BB#" << BBI.BB->getNumber() << " (T:" + << BBI.TrueBB->getNumber() << ",F:" + << BBI.FalseBB->getNumber() << ") "); + RetVal = IfConvertDiamondTail(BBI, Kind, NumDups, NumDups2, + Token->TClobbersPred, + Token->FClobbersPred); + DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + if (RetVal) ++NumDiamondTails; + break; + } } Change |= RetVal; @@ -450,8 +485,11 @@ /// ReverseBranchCondition - Reverse the condition of the end of the block /// branch. Swap block's 'true' and 'false' successors. -bool IfConverter::ReverseBranchCondition(BBInfo &BBI) { - DebugLoc dl; // FIXME: this is nowhere +bool IfConverter::ReverseBranchCondition(BBInfo &BBI) const { + DebugLoc dl; + MachineBasicBlock::iterator BBIT = BBI.BB->getFirstTerminator(); + if (BBIT != BBI.BB->end()) + dl = BBIT->getDebugLoc(); if (!TII->ReverseBranchCondition(BBI.BrCond)) { TII->RemoveBranch(*BBI.BB); TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl); @@ -540,6 +578,188 @@ return TExit && TExit == FalseBBI.BB; } +/// ValidDiamondTail - Returns true if the 'true' and 'false' blocks (along +/// with their common predecessor) form a diamond if a common tail block is +/// extracted +bool IfConverter::ValidDiamondTail( + BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2, + BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const { + Dups1 = Dups2 = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone || + FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone) + return false; + + MachineBasicBlock *TT = TrueBBI.TrueBB; + MachineBasicBlock *TF = TrueBBI.FalseBB; + MachineBasicBlock *FT = FalseBBI.TrueBB; + MachineBasicBlock *FF = FalseBBI.FalseBB; + + if (!TrueBBI.IsBrAnalyzable || !FalseBBI.IsBrAnalyzable) + return false; + + if (!TT) + TT = getNextBlock(TrueBBI.BB); + if (!TF) + TF = getNextBlock(TrueBBI.BB); + if (!FT) + FT = getNextBlock(FalseBBI.BB); + if (!FF) + FF = getNextBlock(FalseBBI.BB); + + if (!TT || !TF) + return false; + if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) + return false; + + // Only looking for the case where it's not an actual diamond. + if (TT == TF || FT == FF) + return false; + + // Check successors. If they don't match, bail. + if (!((TT == FT && TF == FF) || (TF == FT && TT == FF))) + return false; + + // If the branches are opposing, but we can't reverse, don't do it. + if (TF == FT && TT == FF && !FalseBBI.IsBrReversible) + return false; + if (TF == FT && TT == FF) + ReverseBranchCondition(FalseBBI); + + // add debug statement to show that a pair of blocks has passed the basic + // checks. + + // Count duplicate instructions at the beginning of the true and false blocks. + // While we do this, we calculate the costs of predicating the non-shared + // sections of the blocks. + // The size of the blocks are the same. + TrueBBICalc.NonPredSize = TrueBBI.NonPredSize; + FalseBBICalc.NonPredSize = FalseBBI.NonPredSize; + // We only count extra cost for instructions that aren't shared. + TrueBBICalc.ExtraCost = TrueBBICalc.ExtraCost2 = 0; + FalseBBICalc.ExtraCost = FalseBBICalc.ExtraCost2 = 0; + TrueBBICalc.ClobbersPred = false; + FalseBBICalc.ClobbersPred = false; + MachineBasicBlock::iterator TIB = TrueBBI.BB->begin(); + MachineBasicBlock::iterator FIB = FalseBBI.BB->begin(); + MachineBasicBlock::iterator TIE = TrueBBI.BB->end(); + MachineBasicBlock::iterator FIE = FalseBBI.BB->end(); + std::vector PredDefs; + while (TIB != TIE && FIB != FIE) { + // Skip dbg_value instructions. These do not count. + if (TIB->isDebugValue()) { + while (TIB != TIE && TIB->isDebugValue()) + ++TIB; + if (TIB == TIE) + break; + } + if (FIB->isDebugValue()) { + while (FIB != FIE && FIB->isDebugValue()) + ++FIB; + if (FIB == FIE) + break; + } + if (!TIB->isIdenticalTo(*FIB)) + break; + if (TII->DefinesPredicate(*TIB, PredDefs)) { + TrueBBICalc.ClobbersPred = true; + FalseBBICalc.ClobbersPred = true; + } + ++Dups1; + ++TIB; + ++FIB; + } + + // Now, in preparation for counting duplicate instructions at the ends of the + // blocks, move the end iterators up past any unconditional branch + // instructions. + // Check for already containing all of the block. + if (TIB == TIE || FIB == FIE) + return true; + --TIE; + --FIE; + while (TIE != TIB && TIE->isUnconditionalBranch()) + --TIE; + while (FIE != FIB && FIE->isUnconditionalBranch()) + --FIE; + + // If Dups1 includes all of a block, then don't count duplicate + // instructions at the end of the blocks. + if (TIB == TIE || FIB == FIE) + return true; + + // Count duplicate instructions at the ends of the blocks. + while (TIE != TIB && FIE != FIB) { + // Skip dbg_value instructions. These do not count. + if (TIE->isDebugValue()) { + while (TIE != TIB && TIE->isDebugValue()) + --TIE; + if (TIE == TIB) + break; + } + if (FIE->isDebugValue()) { + while (FIE != FIB && FIE->isDebugValue()) + --FIE; + if (FIE == FIB) + break; + } + if (!TIE->isIdenticalTo(*FIE)) + break; + // We need to make sure the conditional branch instructions are the same, + // but we shouldn't count the branch instructions, as they will be stripped + // out during if conversion. + if (!TIE->isBranch()) + ++Dups2; + --TIE; + --FIE; + } + + // Make sure that neither block has any remaining branches, and that at most + // one of them has remaining predicate clobbering instructions. + auto recalculateCostsAndClobbers = [&]( + MachineBasicBlock::iterator &BIB, + MachineBasicBlock::iterator &BIE, + BBInfo &BBIRecalc) { + while (BIB != BIE) { + // Skip dbg_value instructions. These do not count. + if (BIB->isDebugValue()) { + while (BIB != BIE && BIB->isDebugValue()) + ++BIB; + if (BIB == BIE) + break; + } + // A Cond-clobbering instruction can only occur at the end of the + // non-duplicated section. + if (BBIRecalc.ClobbersPred) + return false; + if (TII->isPredicated(*BIB)) + return false; + if (TII->DefinesPredicate(*BIB, PredDefs)) + BBIRecalc.ClobbersPred = true; + if (BIB->isBranch()) + return false; + if (!TII->isPredicable(*BIB)) + return false; + unsigned ExtraPredCost = TII->getPredicationCost(*BIB); + unsigned NumCycles = SchedModel.computeInstrLatency(&(*BIB), false); + if (NumCycles > 1) + BBIRecalc.ExtraCost += NumCycles-1; + BBIRecalc.ExtraCost2 += ExtraPredCost; + ++BIB; + } + return true; + }; + // TIE and FIE both point at the last instruction, move them back. + ++TIE; ++FIE; + if (!recalculateCostsAndClobbers(TIB, TIE, TrueBBICalc)) + return false; + if (!recalculateCostsAndClobbers(FIB, FIE, FalseBBICalc)) + return false; + if (TrueBBICalc.ClobbersPred && FalseBBICalc.ClobbersPred) + return false; + return true; +} + /// ValidDiamond - Returns true if the 'true' and 'false' blocks (along /// with their common predecessor) forms a valid diamond shape for ifcvt. bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, @@ -655,7 +875,10 @@ BBI.TrueBB = BBI.FalseBB = nullptr; BBI.BrCond.clear(); BBI.IsBrAnalyzable = - !TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond); + !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond); + SmallVector RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); + BBI.IsBrReversible = (RevCond.size() == 0) || + !TII->ReverseBranchCondition(RevCond); BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr; if (BBI.BrCond.size()) { @@ -795,6 +1018,31 @@ return true; } +/// FeasibilityAnalysisSharedTail - Determine if the block is a suitable +/// candidate to be predicated by the specified predicate, assuming that all +/// non predicable instructions are part of a shared tail. +bool IfConverter::FeasibilityAnalysisSharedTail( + BBInfo &BBI, SmallVectorImpl &Pred) { + // If the block is dead, then it cannot be predicated. Don't check + // IsUnpredicable, because while the whole block may not be, the portion that + // is unshared may well be predicable. + if (BBI.IsDone) + return false; + + // If it is already predicated but we couldn't analyze its terminator, the + // latter might fallthrough, but we can't determine where to. + // Conservatively avoid if-converting again. + if (BBI.Predicate.size() && !BBI.IsBrAnalyzable) + return false; + + // If it is already predicated, check if the new predicate subsumes + // its predicate. + if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate)) + return false; + + return true; +} + /// AnalyzeBlock - Analyze the structure of the sub-CFG starting from /// the specified block. Record its successors and whether it looks like an /// if-conversion candidate. @@ -838,6 +1086,7 @@ // Do not ifcvt if either path is a back edge to the entry block. if (BBI.TrueBB == BB || BBI.FalseBB == BB) { + DEBUG(dbgs() << "Not ifcvting because the back edge is the entry block.\n"); BBI.IsBeingAnalyzed = false; BBI.IsAnalyzed = true; BBStack.pop_back(); @@ -902,6 +1151,34 @@ Enqueued = true; } + BBInfo TrueBBICalc, FalseBBICalc; + if (CanRevCond && ValidDiamondTail(TrueBBI, FalseBBI, Dups, Dups2, + TrueBBICalc, FalseBBICalc) && + MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) + + TrueBBICalc.ExtraCost), + TrueBBICalc.ExtraCost2, + *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) + + FalseBBICalc.ExtraCost), + FalseBBICalc.ExtraCost2, + Prediction) && + FeasibilityAnalysisSharedTail(TrueBBI, BBI.BrCond) && + FeasibilityAnalysisSharedTail(FalseBBI, RevCond)) { + // DiamondTail: + // if TBB and FBB have a common tail that includes their conditional + // branch instructions, then we can If Convert this pattern. + // EBB + // _/ \_ + // | | + // TBB FBB + // / \ / \ + // FalseBB TrueBB FalseBB + // + Tokens.push_back(llvm::make_unique( + BBI, ICDiamondTail, TNeedSub | FNeedSub, Dups, Dups2, + (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred)); + Enqueued = true; + } + if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) && MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, TrueBBI.ExtraCost2, Prediction) && @@ -1339,6 +1616,214 @@ return true; } +/// IfConvertDiamondTail - If convert an almost-diamond sub-CFG where the true +/// and false blocks share a common tail. +bool IfConverter::IfConvertDiamondTail( + BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2, + bool TClobbersPred, bool FClobbersPred) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + + if (TrueBBI.IsDone || FalseBBI.IsDone) { + // Something has changed. It's no longer safe to predicate these blocks. + BBI.IsAnalyzed = false; + TrueBBI.IsAnalyzed = false; + FalseBBI.IsAnalyzed = false; + return false; + } + + if (TrueBBI.BB->hasAddressTaken() || FalseBBI.BB->hasAddressTaken()) + // Conservatively abort if-conversion if either BB has its address taken. + return false; + + // Put the predicated instructions from the 'true' block before the + // instructions from the 'false' block, unless the true block would clobber + // the predicate, in which case, do the opposite. + BBInfo *BBI1 = &TrueBBI; + BBInfo *BBI2 = &FalseBBI; + SmallVector RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (TII->ReverseBranchCondition(RevCond)) + llvm_unreachable("Unable to reverse branch condition!"); + SmallVector *Cond1 = &BBI.BrCond; + SmallVector *Cond2 = &RevCond; + + // Figure out the more profitable ordering. + bool DoSwap = false; + if (TClobbersPred && !FClobbersPred) + DoSwap = true; + else if (TClobbersPred == FClobbersPred) { + if (TrueBBI.NonPredSize > FalseBBI.NonPredSize) + DoSwap = true; + } + if (DoSwap) { + std::swap(BBI1, BBI2); + std::swap(Cond1, Cond2); + } + + // Remove the conditional branch from entry to the blocks. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + + // Initialize liveins to the first BB. These are potentially redefined by + // predicated instructions. + Redefs.init(TRI); + Redefs.addLiveIns(*BBI1->BB); + + // Remove the duplicated instructions at the beginnings of both paths. + // Skip dbg_value instructions + MachineBasicBlock::iterator DI1 = BBI1->BB->getFirstNonDebugInstr(); + MachineBasicBlock::iterator DI2 = BBI2->BB->getFirstNonDebugInstr(); + BBI1->NonPredSize -= NumDups1; + BBI2->NonPredSize -= NumDups1; + + // Skip past the dups on each side separately since there may be + // differing dbg_value entries. + for (unsigned i = 0; i < NumDups1; ++DI1) { + if (!DI1->isDebugValue()) + ++i; + } + while (NumDups1 != 0) { + ++DI2; + if (!DI2->isDebugValue()) + --NumDups1; + } + + // Compute a set of registers which must not be killed by instructions in BB1: + // This is everything used+live in BB2 after the duplicated instructions. We + // can compute this set by simulating liveness backwards from the end of BB2. + DontKill.init(TRI); + for (MachineBasicBlock::reverse_iterator I = BBI2->BB->rbegin(), + E = MachineBasicBlock::reverse_iterator(DI2); I != E; ++I) { + DontKill.stepBackward(*I); + } + + for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E; + ++I) { + SmallVector, 4> IgnoredClobbers; + Redefs.stepForward(*I, IgnoredClobbers); + } + BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); + BBI2->BB->erase(BBI2->BB->begin(), DI2); + + // Remove branch from the 'true' block. This is safe, because we have + // determined that both blocks have the same branch instructions. The branch + // will be added back at the end, unpredicated. + BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB); + // Remove duplicated instructions. + DI1 = BBI1->BB->end(); + for (unsigned i = 0; i != NumDups2; ) { + // NumDups2 only counted non-dbg_value instructions, so this won't + // run off the head of the list. + assert (DI1 != BBI1->BB->begin()); + --DI1; + // skip dbg_value instructions + if (!DI1->isDebugValue()) + ++i; + } + BBI1->BB->erase(DI1, BBI1->BB->end()); + + // Kill flags in the true block for registers living into the false block + // must be removed. + RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI); + + // Remove 'false' block branch, and find the last instruction to predicate. + // Save the debug location. + DebugLoc dl; + MachineBasicBlock::iterator BBI2T = BBI2->BB->getFirstTerminator(); + if (BBI2T != BBI2->BB->end()) + dl = BBI2T->getDebugLoc(); + BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); + DI2 = BBI2->BB->end(); + while (NumDups2 != 0) { + // NumDups2 only counted non-dbg_value instructions, so this won't + // run off the head of the list. + assert (DI2 != BBI2->BB->begin()); + --DI2; + // skip dbg_value instructions + if (!DI2->isDebugValue()) + --NumDups2; + } + + // Remember which registers would later be defined by the false block. + // This allows us not to predicate instructions in the true block that would + // later be re-defined. That is, rather than + // subeq r0, r1, #1 + // addne r0, r1, #1 + // generate: + // sub r0, r1, #1 + // addne r0, r1, #1 + SmallSet RedefsByFalse; + SmallSet ExtUses; + if (TII->isProfitableToUnpredicate(*BBI1->BB, *BBI2->BB)) { + for (MachineBasicBlock::iterator FI = BBI2->BB->begin(); FI != DI2; ++FI) { + if (FI->isDebugValue()) + continue; + SmallVector Defs; + for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = FI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) { + Defs.push_back(Reg); + } else if (!RedefsByFalse.count(Reg)) { + // These are defined before ctrl flow reach the 'false' instructions. + // They cannot be modified by the 'true' instructions. + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + ExtUses.insert(*SubRegs); + } + } + + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Reg = Defs[i]; + if (!ExtUses.count(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + RedefsByFalse.insert(*SubRegs); + } + } + } + } + + // Predicate the 'true' block. + PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, &RedefsByFalse); + + // After predicating BBI1, if there is a predicated terminator in BBI1 and + // a non-predicated in BBI2, then we don't want to predicate the one from + // BBI2. The reason is that if we merged these blocks, we would end up with + // two predicated terminators in the same block. + if (!BBI2->BB->empty() && (DI2 == BBI2->BB->end())) { + MachineBasicBlock::iterator BBI1T = BBI1->BB->getFirstTerminator(); + MachineBasicBlock::iterator BBI2T = BBI2->BB->getFirstTerminator(); + if (BBI1T != BBI1->BB->end() && TII->isPredicated(*BBI1T) && + BBI2T != BBI2->BB->end() && !TII->isPredicated(*BBI2T)) + llvm_unreachable("Terminator should have been removed for Diamond-Tail case."); + } + + // Predicate the 'false' block. + PredicateBlock(*BBI2, DI2, *Cond2); + + // Merge the true block into the entry of the diamond. + MergeBlocks(BBI, *BBI1, /* AddEdges */ true); + MergeBlocks(BBI, *BBI2, /* AddEdges */ true); + + // Add back the branch. + // Debug location saved above when removing the branch from BBI2 + TII->InsertBranch(*BBI.BB, BBI2->TrueBB, BBI2->FalseBB, BBI2->BrCond, dl); + + RemoveExtraEdges(BBI); + + // Update block info. + BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; + InvalidatePreds(BBI.BB); + + // FIXME: Must maintain LiveIns. + return true; +} + /// IfConvertDiamond - If convert a diamond sub-CFG. /// bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, Index: test/CodeGen/Thumb2/thumb2-ifcvt1.ll =================================================================== --- test/CodeGen/Thumb2/thumb2-ifcvt1.ll +++ test/CodeGen/Thumb2/thumb2-ifcvt1.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-default-it | FileCheck %s -; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it |FileCheck %s +; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it -enable-tail-merge=0 | FileCheck %s define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; CHECK-LABEL: t1: ; CHECK: ittt ne @@ -25,9 +26,9 @@ define i32 @t2(i32 %a, i32 %b) nounwind { entry: ; CHECK-LABEL: t2: -; CHECK: ite gt -; CHECK: subgt -; CHECK: suble +; CHECK: ite {{gt|le}} +; CHECK-DAG: suble +; CHECK-DAG: subgt %tmp1434 = icmp eq i32 %a, %b ; [#uses=1] br i1 %tmp1434, label %bb17, label %bb.outer