Index: llvm/trunk/lib/CodeGen/IfConversion.cpp =================================================================== --- llvm/trunk/lib/CodeGen/IfConversion.cpp +++ llvm/trunk/lib/CodeGen/IfConversion.cpp @@ -242,7 +242,6 @@ void AnalyzeBlocks(MachineFunction &MF, std::vector> &Tokens); void InvalidatePreds(MachineBasicBlock &MBB); - void RemoveExtraEdges(BBInfo &BBI); bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind); bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind); bool IfConvertDiamondCommon(BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI, @@ -1342,14 +1341,6 @@ TII->insertBranch(MBB, &ToMBB, nullptr, NoCond, dl); } -/// Remove true / false edges if either / both are no longer successors. -void IfConverter::RemoveExtraEdges(BBInfo &BBI) { - MachineBasicBlock *TBB = nullptr, *FBB = nullptr; - SmallVector Cond; - if (!TII->analyzeBranch(*BBI.BB, TBB, FBB, Cond)) - BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); -} - /// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all /// values defined in MI which are also live/used by MI. static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) { @@ -1483,15 +1474,15 @@ // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond); - // RemoveExtraEdges won't work if the block has an unanalyzable branch, so - // explicitly remove CvtBBI as a successor. + // Keep the CFG updated. BBI.BB->removeSuccessor(&CvtMBB, true); } else { // Predicate the instructions in the true block. RemoveKills(CvtMBB.begin(), CvtMBB.end(), DontKill, *TRI); PredicateBlock(*CvtBBI, CvtMBB.end(), Cond); - // Merge converted block into entry block. + // Merge converted block into entry block. The BB to Cvt edge is removed + // by MergeBlocks. MergeBlocks(BBI, *CvtBBI); } @@ -1512,8 +1503,6 @@ IterIfcvt = false; } - RemoveExtraEdges(BBI); - // Update block info. BB can be iteratively if-converted. if (!IterIfcvt) BBI.IsDone = true; @@ -1599,10 +1588,6 @@ // Copy instructions in the true block, predicate them, and add them to // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); - - // RemoveExtraEdges won't work if the block has an unanalyzable branch, so - // explicitly remove CvtBBI as a successor. - BBI.BB->removeSuccessor(&CvtMBB, true); } else { // Predicate the 'true' block after removing its branch. CvtBBI->NonPredSize -= TII->removeBranch(CvtMBB); @@ -1612,6 +1597,9 @@ MergeBlocks(BBI, *CvtBBI, false); } + // Keep the CFG updated. + BBI.BB->removeSuccessor(&CvtMBB, true); + // If 'true' block has a 'false' successor, add an exit branch to it. if (HasEarlyExit) { SmallVector RevCond(CvtBBI->BrCond.begin(), @@ -1659,8 +1647,6 @@ IterIfcvt = false; } - RemoveExtraEdges(BBI); - // Update block info. BB can be iteratively if-converted. if (!IterIfcvt) BBI.IsDone = true; @@ -1923,8 +1909,6 @@ TII->insertBranch(*BBI.BB, TrueBBI.TrueBB, TrueBBI.FalseBB, TrueBBI.BrCond, dl); - RemoveExtraEdges(BBI); - // Update block info. BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; InvalidatePreds(*BBI.BB); @@ -1961,6 +1945,11 @@ // fold the tail block in as well. Otherwise, unless it falls through to the // tail, add a unconditional branch to it. if (TailBB) { + // We need to remove the edges to the true and false blocks manually since + // we didn't let IfConvertDiamondCommon update the CFG. + BBI.BB->removeSuccessor(TrueBBI.BB); + BBI.BB->removeSuccessor(FalseBBI.BB, true); + BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()]; bool CanMergeTail = !TailBBI.HasFallThrough && !TailBBI.BB->hasAddressTaken(); @@ -1990,13 +1979,6 @@ } } - // RemoveExtraEdges won't work if the block has an unanalyzable branch, - // which can happen here if TailBB is unanalyzable and is merged, so - // explicitly remove BBI1 and BBI2 as successors. - BBI.BB->removeSuccessor(TrueBBI.BB); - BBI.BB->removeSuccessor(FalseBBI.BB, /* NormalizeSuccessProbs */ true); - RemoveExtraEdges(BBI); - // Update block info. BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; InvalidatePreds(*BBI.BB); @@ -2133,7 +2115,8 @@ /// Move all instructions from FromBB to the end of ToBB. This will leave /// FromBB as an empty block, so remove all of its successor edges except for /// the fall-through edge. If AddEdges is true, i.e., when FromBBI's branch is -/// being moved, add those successor edges to ToBBI. +/// being moved, add those successor edges to ToBBI and remove the old edge +/// from ToBBI to FromBBI. void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { MachineBasicBlock &FromMBB = *FromBBI.BB; assert(!FromMBB.hasAddressTaken() && @@ -2165,12 +2148,10 @@ // AddEdges is true and FromMBB is a successor of ToBBI.BB. auto To2FromProb = BranchProbability::getZero(); if (AddEdges && ToBBI.BB->isSuccessor(&FromMBB)) { + // Remove the old edge but remember the edge probability so we can calculate + // the correct weights on the new edges being added further down. To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, &FromMBB); - // Set the edge probability from ToBBI.BB to FromMBB to zero to avoid the - // edge probability being merged to other edges when this edge is removed - // later. - ToBBI.BB->setSuccProbability(find(ToBBI.BB->successors(), &FromMBB), - BranchProbability::getZero()); + ToBBI.BB->removeSuccessor(&FromMBB); } for (MachineBasicBlock *Succ : FromSuccs) { @@ -2229,9 +2210,11 @@ } } - // Now FromBBI always falls through to the next block! - if (NBB && !FromMBB.isSuccessor(NBB)) - FromMBB.addSuccessor(NBB); + // Move the now empty FromMBB out of the way to the end of the function so + // it doesn't interfere with fallthrough checks done by canFallThroughTo(). + MachineBasicBlock *Last = &*FromMBB.getParent()->rbegin(); + if (Last != &FromMBB) + FromMBB.moveAfter(Last); // Normalize the probabilities of ToBBI.BB's successors with all adjustment // we've done above. Index: llvm/trunk/test/CodeGen/ARM/ifcvt-branch-weight.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/ifcvt-branch-weight.ll +++ llvm/trunk/test/CodeGen/ARM/ifcvt-branch-weight.ll @@ -19,7 +19,7 @@ br i1 %9, label %return, label %bb2 ; CHECK: BB#2: derived from LLVM BB %bb2 -; CHECK: Successors according to CFG: BB#3({{[0-9a-fx/= ]+}}50.00%) BB#4({{[0-9a-fx/= ]+}}50.00%) +; CHECK: Successors according to CFG: BB#4({{[0-9a-fx/= ]+}}50.00%) BB#3({{[0-9a-fx/= ]+}}50.00%) bb2: %v10 = icmp eq i32 %3, 16 Index: llvm/trunk/test/CodeGen/ARM/indirectbr-3.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/indirectbr-3.ll +++ llvm/trunk/test/CodeGen/ARM/indirectbr-3.ll @@ -10,9 +10,11 @@ define i32 @preserve_blocks(i32 %x) { ; preserve_blocks: ; CHECK: Block address taken -; CHECK: movs r0, #1 -; CHECK: Block address taken +; CHECK: %ibt1 ; CHECK: movs r0, #2 +; CHECK: Block address taken +; CHECK: %ibt2 +; CHECK: movs r0, #1 ; CHECK-NOT: Address of block that was removed by CodeGen ; Separate bug. There are no valid diamonds to if-convert in this file. Index: llvm/trunk/test/CodeGen/ARM/struct-byval-frame-index.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/struct-byval-frame-index.ll +++ llvm/trunk/test/CodeGen/ARM/struct-byval-frame-index.ll @@ -4,11 +4,21 @@ ; generated. ; PR16393 +; We expect the spill to be generated in %if.end230 and the reloads in +; %if.end249 and %for.body285. + ; CHECK: set_stored_macroblock_parameters +; CHECK: @ %if.end230 +; CHECK-NOT:@ %if. +; CHECK-NOT:@ %for. ; CHECK: str r{{.*}}, [sp, [[SLOT:#[0-9]+]]] @ 4-byte Spill -; CHECK: bl RestoreMVBlock8x8 -; CHECK: bl RestoreMVBlock8x8 -; CHECK: bl RestoreMVBlock8x8 +; CHECK: @ %if.end249 +; CHECK-NOT:@ %if. +; CHECK-NOT:@ %for. +; CHECK: ldr r{{.*}}, [sp, [[SLOT]]] @ 4-byte Reload +; CHECK: @ %for.body285 +; CHECK-NOT:@ %if. +; CHECK-NOT:@ %for. ; CHECK: ldr r{{.*}}, [sp, [[SLOT]]] @ 4-byte Reload target triple = "armv7l-unknown-linux-gnueabihf" Index: llvm/trunk/test/CodeGen/PowerPC/logic-ops-on-compares.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/logic-ops-on-compares.ll +++ llvm/trunk/test/CodeGen/PowerPC/logic-ops-on-compares.ll @@ -47,7 +47,6 @@ ; CHECK-NEXT: rldicl. r3, r3, 0, 63 ; CHECK-NEXT: bclr 12, eq, 0 ; CHECK-NEXT: # BB#1: # %if.end29.thread136 -; CHECK-NEXT: .LBB1_2: # %if.end29 entry: %0 = load i32, i32* %ptr, align 4 %rem17127 = and i32 %0, 1 @@ -106,7 +105,6 @@ ; CHECK-NEXT: rldicl. r3, r3, 0, 63 ; CHECK-NEXT: bclr 12, eq, 0 ; CHECK-NEXT: # BB#1: # %if.end29.thread136 -; CHECK-NEXT: .LBB3_2: # %if.end29 entry: %0 = load i64, i64* %ptr, align 4 %rem17127 = and i64 %0, 1 @@ -167,7 +165,6 @@ ; CHECK-NEXT: andi. r3, r3, 1 ; CHECK-NEXT: bclr 12, gt, 0 ; CHECK-NEXT: # BB#1: # %if.end29.thread136 -; CHECK-NEXT: .LBB5_2: # %if.end29 entry: %0 = load i64, i64* %ptr, align 4 %rem17127 = and i64 %0, 1 Index: llvm/trunk/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll +++ llvm/trunk/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll @@ -591,7 +591,9 @@ ; Another infinite loop test this time with two nested infinite loop. ; CHECK-LABEL: infiniteloop3 -; CHECK: # %end +; CHECK: Lfunc_begin[[FUNCNUM:[0-9]+]] +; CHECK: bclr +; CHECK: Lfunc_end[[FUNCNUM]] define void @infiniteloop3() { entry: br i1 undef, label %loop2a, label %body Index: llvm/trunk/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll =================================================================== --- llvm/trunk/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll +++ llvm/trunk/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll @@ -7,13 +7,15 @@ define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { ; CHECK-LABEL: f1: ; CHECK: l %r2, 0(%r3) -; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: lr [[NEW:%r[0-9]+]], %r2 +; CHECK: j [[LOOP:\.[^:]*]] +; CHECK: [[BB1:\.[^:]*]]: +; CHECK: cs %r2, [[NEW:%r[0-9]+]], 0(%r3) +; CHECK: ber %r14 +; CHECK: [[LOOP]]: +; CHECK: lr [[NEW]], %r2 ; CHECK: crjle %r2, %r4, [[KEEP:\..*]] ; CHECK: lr [[NEW]], %r4 -; CHECK: cs %r2, [[NEW]], 0(%r3) -; CHECK: ber %r14 -; CHECK: j [[LOOP]] +; CHECK: j [[BB1]] %res = atomicrmw min i32 *%src, i32 %b seq_cst ret i32 %res } @@ -22,13 +24,15 @@ define i32 @f2(i32 %dummy, i32 *%src, i32 %b) { ; CHECK-LABEL: f2: ; CHECK: l %r2, 0(%r3) -; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: lr [[NEW:%r[0-9]+]], %r2 +; CHECK: j [[LOOP:\.[^:]*]] +; CHECK: [[BB1:\.[^:]*]]: +; CHECK: cs %r2, [[NEW:%r[0-9]+]], 0(%r3) +; CHECK: ber %r14 +; CHECK: [[LOOP]]: +; CHECK: lr [[NEW]], %r2 ; CHECK: crjhe %r2, %r4, [[KEEP:\..*]] ; CHECK: lr [[NEW]], %r4 -; CHECK: cs %r2, [[NEW]], 0(%r3) -; CHECK: ber %r14 -; CHECK: j [[LOOP]] +; CHECK: j [[BB1]] %res = atomicrmw max i32 *%src, i32 %b seq_cst ret i32 %res } @@ -37,13 +41,15 @@ define i32 @f3(i32 %dummy, i32 *%src, i32 %b) { ; CHECK-LABEL: f3: ; CHECK: l %r2, 0(%r3) -; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: lr [[NEW:%r[0-9]+]], %r2 +; CHECK: j [[LOOP:\.[^:]*]] +; CHECK: [[BB1:\.[^:]*]]: +; CHECK: cs %r2, [[NEW:%r[0-9]+]], 0(%r3) +; CHECK: ber %r14 +; CHECK: [[LOOP]]: +; CHECK: lr [[NEW]], %r2 ; CHECK: clrjle %r2, %r4, [[KEEP:\..*]] ; CHECK: lr [[NEW]], %r4 -; CHECK: cs %r2, [[NEW]], 0(%r3) -; CHECK: ber %r14 -; CHECK: j [[LOOP]] +; CHECK: j [[BB1]] %res = atomicrmw umin i32 *%src, i32 %b seq_cst ret i32 %res } @@ -52,13 +58,15 @@ define i32 @f4(i32 %dummy, i32 *%src, i32 %b) { ; CHECK-LABEL: f4: ; CHECK: l %r2, 0(%r3) -; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: lr [[NEW:%r[0-9]+]], %r2 +; CHECK: j [[LOOP:\.[^:]*]] +; CHECK: [[BB1:\.[^:]*]]: +; CHECK: cs %r2, [[NEW:%r[0-9]+]], 0(%r3) +; CHECK: ber %r14 +; CHECK: [[LOOP]]: +; CHECK: lr [[NEW]], %r2 ; CHECK: clrjhe %r2, %r4, [[KEEP:\..*]] ; CHECK: lr [[NEW]], %r4 -; CHECK: cs %r2, [[NEW]], 0(%r3) -; CHECK: ber %r14 -; CHECK: j [[LOOP]] +; CHECK: j [[BB1]] %res = atomicrmw umax i32 *%src, i32 %b seq_cst ret i32 %res } @@ -159,14 +167,15 @@ define i32 @f13(i32 %dummy, i32 *%ptr) { ; CHECK-LABEL: f13: ; CHECK: lhi [[LIMIT:%r[0-9]+]], 42 -; CHECK: l %r2, 0(%r3) -; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: lr [[NEW:%r[0-9]+]], %r2 +; CHECK: j [[LOOP:\.[^:]*]] +; CHECK: [[BB1:\.[^:]*]]: +; CHECK: cs %r2, [[NEW:%r[0-9]+]], 0(%r3) +; CHECK: ber %r14 +; CHECK: [[LOOP]]: +; CHECK: lr [[NEW]], %r2 ; CHECK: crjle %r2, [[LIMIT]], [[KEEP:\..*]] ; CHECK: lhi [[NEW]], 42 -; CHECK: cs %r2, [[NEW]], 0(%r3) -; CHECK: ber %r14 -; CHECK: j [[LOOP]] +; CHECK: j [[BB1]] %res = atomicrmw min i32 *%ptr, i32 42 seq_cst ret i32 %res } Index: llvm/trunk/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll =================================================================== --- llvm/trunk/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll +++ llvm/trunk/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll @@ -7,13 +7,15 @@ define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { ; CHECK-LABEL: f1: ; CHECK: lg %r2, 0(%r3) +; CHECK: j [[LOOP:\.[^:]*]] +; CHECK: [[BB1:\.[^:]*]]: +; CHECK: csg %r2, [[NEW:%r[0-9]+]], 0(%r3) +; CHECK: ber %r14 ; CHECK: [[LOOP:\.[^:]*]]: ; CHECK: lgr [[NEW:%r[0-9]+]], %r2 ; CHECK: cgrjle %r2, %r4, [[KEEP:\..*]] ; CHECK: lgr [[NEW]], %r4 -; CHECK: csg %r2, [[NEW]], 0(%r3) -; CHECK: ber %r14 -; CHECK: j [[LOOP]] +; CHECK: j [[BB1]] %res = atomicrmw min i64 *%src, i64 %b seq_cst ret i64 %res } @@ -22,13 +24,15 @@ define i64 @f2(i64 %dummy, i64 *%src, i64 %b) { ; CHECK-LABEL: f2: ; CHECK: lg %r2, 0(%r3) +; CHECK: j [[LOOP:\.[^:]*]] +; CHECK: [[BB1:\.[^:]*]]: +; CHECK: csg %r2, [[NEW:%r[0-9]+]], 0(%r3) +; CHECK: ber %r14 ; CHECK: [[LOOP:\.[^:]*]]: ; CHECK: lgr [[NEW:%r[0-9]+]], %r2 ; CHECK: cgrjhe %r2, %r4, [[KEEP:\..*]] ; CHECK: lgr [[NEW]], %r4 -; CHECK: csg %r2, [[NEW]], 0(%r3) -; CHECK: ber %r14 -; CHECK: j [[LOOP]] +; CHECK: j [[BB1]] %res = atomicrmw max i64 *%src, i64 %b seq_cst ret i64 %res } @@ -37,13 +41,15 @@ define i64 @f3(i64 %dummy, i64 *%src, i64 %b) { ; CHECK-LABEL: f3: ; CHECK: lg %r2, 0(%r3) +; CHECK: j [[LOOP:\.[^:]*]] +; CHECK: [[BB1:\.[^:]*]]: +; CHECK: csg %r2, [[NEW:%r[0-9]+]], 0(%r3) +; CHECK: ber %r14 ; CHECK: [[LOOP:\.[^:]*]]: ; CHECK: lgr [[NEW:%r[0-9]+]], %r2 ; CHECK: clgrjle %r2, %r4, [[KEEP:\..*]] ; CHECK: lgr [[NEW]], %r4 -; CHECK: csg %r2, [[NEW]], 0(%r3) -; CHECK: ber %r14 -; CHECK: j [[LOOP]] +; CHECK: j [[BB1]] %res = atomicrmw umin i64 *%src, i64 %b seq_cst ret i64 %res } @@ -52,13 +58,15 @@ define i64 @f4(i64 %dummy, i64 *%src, i64 %b) { ; CHECK-LABEL: f4: ; CHECK: lg %r2, 0(%r3) +; CHECK: j [[LOOP:\.[^:]*]] +; CHECK: [[BB1:\.[^:]*]]: +; CHECK: csg %r2, [[NEW:%r[0-9]+]], 0(%r3) +; CHECK: ber %r14 ; CHECK: [[LOOP:\.[^:]*]]: ; CHECK: lgr [[NEW:%r[0-9]+]], %r2 ; CHECK: clgrjhe %r2, %r4, [[KEEP:\..*]] ; CHECK: lgr [[NEW]], %r4 -; CHECK: csg %r2, [[NEW]], 0(%r3) -; CHECK: ber %r14 -; CHECK: j [[LOOP]] +; CHECK: j [[BB1]] %res = atomicrmw umax i64 *%src, i64 %b seq_cst ret i64 %res } @@ -127,13 +135,15 @@ ; CHECK-LABEL: f10: ; CHECK: lghi [[LIMIT:%r[0-9]+]], 42 ; CHECK: lg %r2, 0(%r3) +; CHECK: j [[LOOP:\.[^:]*]] +; CHECK: [[BB1:\.[^:]*]]: +; CHECK: csg %r2, [[NEW:%r[0-9]+]], 0(%r3) +; CHECK: ber %r14 ; CHECK: [[LOOP:\.[^:]*]]: ; CHECK: lgr [[NEW:%r[0-9]+]], %r2 ; CHECK: cgrjle %r2, [[LIMIT]], [[KEEP:\..*]] ; CHECK: lghi [[NEW]], 42 -; CHECK: csg %r2, [[NEW]], 0(%r3) -; CHECK: ber %r14 -; CHECK: j [[LOOP]] +; CHECK: j [[BB1]] %res = atomicrmw min i64 *%ptr, i64 42 seq_cst ret i64 %res }