Index: lib/CodeGen/IfConversion.cpp =================================================================== --- lib/CodeGen/IfConversion.cpp +++ lib/CodeGen/IfConversion.cpp @@ -242,7 +242,6 @@ void AnalyzeBlocks(MachineFunction &MF, std::vector> &Tokens); void InvalidatePreds(MachineBasicBlock &MBB); - void RemoveExtraEdges(BBInfo &BBI); bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind); bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind); bool IfConvertDiamondCommon(BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI, @@ -1342,14 +1341,6 @@ TII->insertBranch(MBB, &ToMBB, nullptr, NoCond, dl); } -/// Remove true / false edges if either / both are no longer successors. -void IfConverter::RemoveExtraEdges(BBInfo &BBI) { - MachineBasicBlock *TBB = nullptr, *FBB = nullptr; - SmallVector Cond; - if (!TII->analyzeBranch(*BBI.BB, TBB, FBB, Cond)) - BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); -} - /// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all /// values defined in MI which are also live/used by MI. static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) { @@ -1474,21 +1465,24 @@ DontKill.addLiveIns(NextMBB); } + // Remove the branches from the entry so we can add the contents of the true + // block to it. + BBI.NonPredSize -= TII->removeBranch(*BBI.BB); + if (CvtMBB.pred_size() > 1) { - BBI.NonPredSize -= TII->removeBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond); - // RemoveExtraEdges won't work if the block has an unanalyzable branch, so - // explicitly remove CvtBBI as a successor. + // Keep the CFG updated. BBI.BB->removeSuccessor(&CvtMBB, true); } else { + // Predicate the instructions in the true block. RemoveKills(CvtMBB.begin(), CvtMBB.end(), DontKill, *TRI); PredicateBlock(*CvtBBI, CvtMBB.end(), Cond); - // Merge converted block into entry block. - BBI.NonPredSize -= TII->removeBranch(*BBI.BB); + // Merge converted block into entry block. The BB to Cvt edge is removed + // by MergeBlocks. MergeBlocks(BBI, *CvtBBI); } @@ -1509,8 +1503,6 @@ IterIfcvt = false; } - RemoveExtraEdges(BBI); - // Update block info. BB can be iteratively if-converted. if (!IterIfcvt) BBI.IsDone = true; @@ -1588,25 +1580,26 @@ BBCvt = MBPI->getEdgeProbability(BBI.BB, &CvtMBB); } + // Remove the branches from the entry so we can add the contents of the true + // block to it. + BBI.NonPredSize -= TII->removeBranch(*BBI.BB); + if (CvtMBB.pred_size() > 1) { - BBI.NonPredSize -= TII->removeBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); - - // RemoveExtraEdges won't work if the block has an unanalyzable branch, so - // explicitly remove CvtBBI as a successor. - BBI.BB->removeSuccessor(&CvtMBB, true); } else { // Predicate the 'true' block after removing its branch. CvtBBI->NonPredSize -= TII->removeBranch(CvtMBB); PredicateBlock(*CvtBBI, CvtMBB.end(), Cond); // Now merge the entry of the triangle with the true block. - BBI.NonPredSize -= TII->removeBranch(*BBI.BB); MergeBlocks(BBI, *CvtBBI, false); } + // Keep the CFG updated. + BBI.BB->removeSuccessor(&CvtMBB, true); + // If 'true' block has a 'false' successor, add an exit branch to it. if (HasEarlyExit) { SmallVector RevCond(CvtBBI->BrCond.begin(), @@ -1654,8 +1647,6 @@ IterIfcvt = false; } - RemoveExtraEdges(BBI); - // Update block info. BB can be iteratively if-converted. if (!IterIfcvt) BBI.IsDone = true; @@ -1918,8 +1909,6 @@ TII->insertBranch(*BBI.BB, TrueBBI.TrueBB, TrueBBI.FalseBB, TrueBBI.BrCond, dl); - RemoveExtraEdges(BBI); - // Update block info. BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; InvalidatePreds(*BBI.BB); @@ -1956,6 +1945,11 @@ // fold the tail block in as well. Otherwise, unless it falls through to the // tail, add a unconditional branch to it. if (TailBB) { + // We need to remove the edges to the true and false blocks manually since + // we didn't let IfConvertDiamondCommon update the CFG. + BBI.BB->removeSuccessor(TrueBBI.BB); + BBI.BB->removeSuccessor(FalseBBI.BB, true); + BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()]; bool CanMergeTail = !TailBBI.HasFallThrough && !TailBBI.BB->hasAddressTaken(); @@ -1985,13 +1979,6 @@ } } - // RemoveExtraEdges won't work if the block has an unanalyzable branch, - // which can happen here if TailBB is unanalyzable and is merged, so - // explicitly remove BBI1 and BBI2 as successors. - BBI.BB->removeSuccessor(TrueBBI.BB); - BBI.BB->removeSuccessor(FalseBBI.BB, /* NormalizeSuccessProbs */ true); - RemoveExtraEdges(BBI); - // Update block info. BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; InvalidatePreds(*BBI.BB); @@ -2128,7 +2115,8 @@ /// Move all instructions from FromBB to the end of ToBB. This will leave /// FromBB as an empty block, so remove all of its successor edges except for /// the fall-through edge. If AddEdges is true, i.e., when FromBBI's branch is -/// being moved, add those successor edges to ToBBI. +/// being moved, add those successor edges to ToBBI and remove the old edge +/// from ToBBI to FromBBI. void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { MachineBasicBlock &FromMBB = *FromBBI.BB; assert(!FromMBB.hasAddressTaken() && @@ -2160,12 +2148,10 @@ // AddEdges is true and FromMBB is a successor of ToBBI.BB. auto To2FromProb = BranchProbability::getZero(); if (AddEdges && ToBBI.BB->isSuccessor(&FromMBB)) { + // Remove the old edge but remember the edge probability so we can calculate + // the correct weights on the new edges being added further down. To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, &FromMBB); - // Set the edge probability from ToBBI.BB to FromMBB to zero to avoid the - // edge probability being merged to other edges when this edge is removed - // later. - ToBBI.BB->setSuccProbability(find(ToBBI.BB->successors(), &FromMBB), - BranchProbability::getZero()); + ToBBI.BB->removeSuccessor(&FromMBB); } for (MachineBasicBlock *Succ : FromSuccs) { @@ -2224,9 +2210,11 @@ } } - // Now FromBBI always falls through to the next block! - if (NBB && !FromMBB.isSuccessor(NBB)) - FromMBB.addSuccessor(NBB); + // Move the now empty FromMBB out of the way to the end of the function so + // it doesn't interfere with fallthrough checks done by canFallThroughTo(). + MachineBasicBlock *Last = &*FromMBB.getParent()->rbegin(); + if (Last != &FromMBB) + FromMBB.moveAfter(Last); // Normalize the probabilities of ToBBI.BB's successors with all adjustment // we've done above. Index: test/CodeGen/ARM/ifcvt-branch-weight.ll =================================================================== --- test/CodeGen/ARM/ifcvt-branch-weight.ll +++ test/CodeGen/ARM/ifcvt-branch-weight.ll @@ -19,7 +19,7 @@ br i1 %9, label %return, label %bb2 ; CHECK: BB#2: derived from LLVM BB %bb2 -; CHECK: Successors according to CFG: BB#3({{[0-9a-fx/= ]+}}50.00%) BB#4({{[0-9a-fx/= ]+}}50.00%) +; CHECK: Successors according to CFG: BB#4({{[0-9a-fx/= ]+}}50.00%) BB#3({{[0-9a-fx/= ]+}}50.00%) bb2: %v10 = icmp eq i32 %3, 16 Index: test/CodeGen/ARM/indirectbr-3.ll =================================================================== --- test/CodeGen/ARM/indirectbr-3.ll +++ test/CodeGen/ARM/indirectbr-3.ll @@ -10,9 +10,11 @@ define i32 @preserve_blocks(i32 %x) { ; preserve_blocks: ; CHECK: Block address taken -; CHECK: movs r0, #1 -; CHECK: Block address taken +; CHECK: %ibt1 ; CHECK: movs r0, #2 +; CHECK: Block address taken +; CHECK: %ibt2 +; CHECK: movs r0, #1 ; CHECK-NOT: Address of block that was removed by CodeGen ; Separate bug. There are no valid diamonds to if-convert in this file. Index: test/CodeGen/ARM/struct-byval-frame-index.ll =================================================================== --- test/CodeGen/ARM/struct-byval-frame-index.ll +++ test/CodeGen/ARM/struct-byval-frame-index.ll @@ -4,11 +4,21 @@ ; generated. ; PR16393 +; We expect the spill to be generated in %if.end230 and the reloads in +; %if.end249 and %for.body285. + ; CHECK: set_stored_macroblock_parameters +; CHECK: @ %if.end230 +; CHECK-NOT:@ %if. +; CHECK-NOT:@ %for. ; CHECK: str r{{.*}}, [sp, [[SLOT:#[0-9]+]]] @ 4-byte Spill -; CHECK: bl RestoreMVBlock8x8 -; CHECK: bl RestoreMVBlock8x8 -; CHECK: bl RestoreMVBlock8x8 +; CHECK: @ %if.end249 +; CHECK-NOT:@ %if. +; CHECK-NOT:@ %for. +; CHECK: ldr r{{.*}}, [sp, [[SLOT]]] @ 4-byte Reload +; CHECK: @ %for.body285 +; CHECK-NOT:@ %if. +; CHECK-NOT:@ %for. ; CHECK: ldr r{{.*}}, [sp, [[SLOT]]] @ 4-byte Reload target triple = "armv7l-unknown-linux-gnueabihf" Index: test/CodeGen/MIR/ARM/PR32721_ifcvt_triangle_unanalyzable.mir =================================================================== --- /dev/null +++ test/CodeGen/MIR/ARM/PR32721_ifcvt_triangle_unanalyzable.mir @@ -0,0 +1,23 @@ +# RUN: llc -mtriple=arm-apple-ios -run-pass=if-converter %s -o - | FileCheck %s +--- +name: foo +body: | + bb.0: + B %bb.2 + + bb.1: + BX_RET 14, 0 + + bb.2: + Bcc %bb.1, 1, %cpsr + + bb.3: + B %bb.1 +... + +# We should get a single block containing the BX_RET, with no successors at all + +# CHECK: body: +# CHECK-NEXT: bb.0: +# CHECK-NEXT: BX_RET + Index: test/CodeGen/MIR/ARM/ifcvt_diamond_unanalyzable.mir =================================================================== --- /dev/null +++ test/CodeGen/MIR/ARM/ifcvt_diamond_unanalyzable.mir @@ -0,0 +1,30 @@ +# RUN: llc -mtriple=arm-apple-ios -run-pass=if-converter %s -o - | FileCheck %s +--- +name: foo +body: | + bb.0: + Bcc %bb.2, 1, %cpsr + + bb.1: + %sp = tADDspi %sp, 1, 14, _ + B %bb.3 + + bb.2: + %sp = tADDspi %sp, 2, 14, _ + B %bb.3 + + bb.3: + successors: + %sp = tADDspi %sp, 3, 14, _ + BX_RET 14, _ +... + +# Diamond testcase with unanalyzable instruction in the BB following the +# diamond. + +# CHECK: body: | +# CHECK: bb.0: +# CHECK: %sp = tADDspi %sp, 2, 1, %cpsr +# CHECK: %sp = tADDspi %sp, 1, 0, %cpsr, implicit %sp +# CHECK: %sp = tADDspi %sp, 3, 14, _ +# CHECK: BX_RET 14, _ Index: test/CodeGen/MIR/ARM/ifcvt_forked_diamond_unanalyzable.mir =================================================================== --- /dev/null +++ test/CodeGen/MIR/ARM/ifcvt_forked_diamond_unanalyzable.mir @@ -0,0 +1,48 @@ +# RUN: llc -mtriple=arm-apple-ios -run-pass=if-converter %s -o - | FileCheck %s +--- +name: foo +body: | + bb.0: + Bcc %bb.2, 1, %cpsr + + bb.1: + successors: %bb.3(0x20000000), %bb.4(0x60000000) + %sp = tADDspi %sp, 1, 14, _ + Bcc %bb.3, 1, %cpsr + B %bb.4 + + bb.2: + successors: %bb.3(0x20000000), %bb.4(0x60000000) + %sp = tADDspi %sp, 2, 14, _ + Bcc %bb.3, 1, %cpsr + B %bb.4 + + bb.3: + successors: + %sp = tADDspi %sp, 3, 14, _ + BX_RET 14, _ + + bb.4: + successors: + %sp = tADDspi %sp, 4, 14, _ + BX_RET 14, _ +... + +# Forked-diamond testcase with unanalyzable instructions in both the True and +# False BBs following the forked diamond. + +# CHECK: body: | +# CHECK: bb.0: +# CHECK: successors: %bb.2(0x20000000), %bb.1(0x60000000) + +# CHECK: %sp = tADDspi %sp, 2, 1, %cpsr +# CHECK: %sp = tADDspi %sp, 1, 0, %cpsr, implicit %sp +# CHECK: Bcc %bb.2, 1, %cpsr + +# CHECK: bb.1: +# CHECK: %sp = tADDspi %sp, 4, 14, _ +# CHECK: BX_RET 14, _ + +# CHECK: bb.2: +# CHECK: %sp = tADDspi %sp, 3, 14, _ +# CHECK: BX_RET 14, _ Index: test/CodeGen/MIR/ARM/ifcvt_simple_bad_zero_prob_succ.mir =================================================================== --- /dev/null +++ test/CodeGen/MIR/ARM/ifcvt_simple_bad_zero_prob_succ.mir @@ -0,0 +1,33 @@ +# RUN: llc -mtriple=arm-apple-ios -run-pass=if-converter %s -o - | FileCheck %s +--- +name: f1 +body: | + bb.0: + + bb.1: + Bcc %bb.3, 0, %cpsr + + bb.2: + + bb.3: + Bcc %bb.1, 0, %cpsr + + bb.4: + successors: %bb.1 + tBRIND %r1, 14, _ +... + +# We should only get bb.1 as successor to bb.1. No zero percent probability +# edge from bb.1 to bb.2. There shouldn't even be a bb.2 at all. + +# CHECK: body: | +# CHECK: bb.0: +# CHECK: successors: %bb.1(0x80000000) + +# CHECK: bb.1: +# CHECK: successors: %bb.1(0x80000000) +# CHECK-NOT: %bb.2(0x00000000) +# CHECK: tBRIND %r1, 1, %cpsr +# CHECK: B %bb.1 + +#CHECK-NOT: bb.2: Index: test/CodeGen/MIR/ARM/ifcvt_simple_unanalyzable.mir =================================================================== --- /dev/null +++ test/CodeGen/MIR/ARM/ifcvt_simple_unanalyzable.mir @@ -0,0 +1,25 @@ +# RUN: llc -mtriple=arm-apple-ios -run-pass=if-converter %s -o - | FileCheck %s +--- +name: foo +body: | + bb.0: + Bcc %bb.2, 0, %cpsr + + bb.1: + successors: + BX_RET 14, _ + + bb.2: + successors: + %sp = tADDspi %sp, 2, 14, _ + BX_RET 14, _ +... + +# Simple testcase with unanalyzable instructions in both TBB and FBB. + +# CHECK: body: | +# CHECK: bb.0: +# CHECK: %sp = tADDspi %sp, 2, 0, %cpsr +# CHECK: BX_RET 0, %cpsr +# CHECK: BX_RET 14, _ + Index: test/CodeGen/MIR/ARM/ifcvt_triangleWoCvtToNextEdge.mir =================================================================== --- /dev/null +++ test/CodeGen/MIR/ARM/ifcvt_triangleWoCvtToNextEdge.mir @@ -0,0 +1,52 @@ +# RUN: llc -mtriple=arm-apple-ios -run-pass=if-converter %s -o - | FileCheck %s +--- | + declare void @__stack_chk_fail() + declare void @bar() + + define void @foo() { + ret void + } +... +--- +name: foo +body: | + + bb.0: + Bcc %bb.1, 1, %cpsr + B %bb.2 + + bb.1: + Bcc %bb.3, 0, %cpsr + + bb.2: + successors: + tBL 14, %cpsr, @__stack_chk_fail + + bb.3: + successors: + %sp = tADDspi %sp, 2, 14, _ + %sp = tADDspi %sp, 2, 14, _ + tTAILJMPdND @bar, 14, %cpsr +... + +# bb.2 has no successors, presumably because __stack_chk_fail doesn't return, +# so there should be no edge from bb.2 to bb.3. +# Nevertheless, IfConversion treats bb.1, bb.2, bb.3 as a triangle and +# inserts a predicated copy of bb.2 in bb.1. + +# This caused r302876 to die with a failed assertion. + +# CHECK: bb.0: +# CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) +# CHECK: Bcc %bb.2, 1, %cpsr + +# CHECK: bb.1: +# CHECK-NOT: successors: +# CHECK: tBL 14, %cpsr, @__stack_chk_fail + +# CHECK: bb.2: +# CHECK-NOT: successors: +# CHECK: tBL 1, %cpsr, @__stack_chk_fail +# CHECK: %sp = tADDspi %sp, 2, 14, _ +# CHECK: %sp = tADDspi %sp, 2, 14, _ +# CHECK: tTAILJMPdND @bar, 14, %cpsr Index: test/CodeGen/PowerPC/logic-ops-on-compares.ll =================================================================== --- test/CodeGen/PowerPC/logic-ops-on-compares.ll +++ test/CodeGen/PowerPC/logic-ops-on-compares.ll @@ -47,7 +47,6 @@ ; CHECK-NEXT: rldicl. r3, r3, 0, 63 ; CHECK-NEXT: bclr 12, 2, 0 ; CHECK-NEXT: # BB#1: # %if.end29.thread136 -; CHECK-NEXT: .LBB1_2: # %if.end29 entry: %0 = load i32, i32* %ptr, align 4 %rem17127 = and i32 %0, 1 @@ -106,7 +105,6 @@ ; CHECK-NEXT: rldicl. r3, r3, 0, 63 ; CHECK-NEXT: bclr 12, 2, 0 ; CHECK-NEXT: # BB#1: # %if.end29.thread136 -; CHECK-NEXT: .LBB3_2: # %if.end29 entry: %0 = load i64, i64* %ptr, align 4 %rem17127 = and i64 %0, 1 @@ -167,7 +165,6 @@ ; CHECK-NEXT: andi. r3, r3, 1 ; CHECK-NEXT: bclr 12, 1, 0 ; CHECK-NEXT: # BB#1: # %if.end29.thread136 -; CHECK-NEXT: .LBB5_2: # %if.end29 entry: %0 = load i64, i64* %ptr, align 4 %rem17127 = and i64 %0, 1 Index: test/CodeGen/PowerPC/ppc-shrink-wrapping.ll =================================================================== --- test/CodeGen/PowerPC/ppc-shrink-wrapping.ll +++ test/CodeGen/PowerPC/ppc-shrink-wrapping.ll @@ -591,7 +591,9 @@ ; Another infinite loop test this time with two nested infinite loop. ; CHECK-LABEL: infiniteloop3 -; CHECK: # %end +; CHECK: Lfunc_begin[[FUNCNUM:[0-9]+]] +; CHECK: bclr +; CHECK: Lfunc_end[[FUNCNUM]] define void @infiniteloop3() { entry: br i1 undef, label %loop2a, label %body Index: test/CodeGen/SystemZ/atomicrmw-minmax-03.ll =================================================================== --- test/CodeGen/SystemZ/atomicrmw-minmax-03.ll +++ test/CodeGen/SystemZ/atomicrmw-minmax-03.ll @@ -7,13 +7,15 @@ define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { ; CHECK-LABEL: f1: ; CHECK: l %r2, 0(%r3) -; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: lr [[NEW:%r[0-9]+]], %r2 +; CHECK: j [[LOOP:\.[^:]*]] +; CHECK: [[BB1:\.[^:]*]]: +; CHECK: cs %r2, [[NEW:%r[0-9]+]], 0(%r3) +; CHECK: ber %r14 +; CHECK: [[LOOP]]: +; CHECK: lr [[NEW]], %r2 ; CHECK: crjle %r2, %r4, [[KEEP:\..*]] ; CHECK: lr [[NEW]], %r4 -; CHECK: cs %r2, [[NEW]], 0(%r3) -; CHECK: ber %r14 -; CHECK: j [[LOOP]] +; CHECK: j [[BB1]] %res = atomicrmw min i32 *%src, i32 %b seq_cst ret i32 %res } @@ -22,13 +24,15 @@ define i32 @f2(i32 %dummy, i32 *%src, i32 %b) { ; CHECK-LABEL: f2: ; CHECK: l %r2, 0(%r3) -; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: lr [[NEW:%r[0-9]+]], %r2 +; CHECK: j [[LOOP:\.[^:]*]] +; CHECK: [[BB1:\.[^:]*]]: +; CHECK: cs %r2, [[NEW:%r[0-9]+]], 0(%r3) +; CHECK: ber %r14 +; CHECK: [[LOOP]]: +; CHECK: lr [[NEW]], %r2 ; CHECK: crjhe %r2, %r4, [[KEEP:\..*]] ; CHECK: lr [[NEW]], %r4 -; CHECK: cs %r2, [[NEW]], 0(%r3) -; CHECK: ber %r14 -; CHECK: j [[LOOP]] +; CHECK: j [[BB1]] %res = atomicrmw max i32 *%src, i32 %b seq_cst ret i32 %res } @@ -37,13 +41,15 @@ define i32 @f3(i32 %dummy, i32 *%src, i32 %b) { ; CHECK-LABEL: f3: ; CHECK: l %r2, 0(%r3) -; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: lr [[NEW:%r[0-9]+]], %r2 +; CHECK: j [[LOOP:\.[^:]*]] +; CHECK: [[BB1:\.[^:]*]]: +; CHECK: cs %r2, [[NEW:%r[0-9]+]], 0(%r3) +; CHECK: ber %r14 +; CHECK: [[LOOP]]: +; CHECK: lr [[NEW]], %r2 ; CHECK: clrjle %r2, %r4, [[KEEP:\..*]] ; CHECK: lr [[NEW]], %r4 -; CHECK: cs %r2, [[NEW]], 0(%r3) -; CHECK: ber %r14 -; CHECK: j [[LOOP]] +; CHECK: j [[BB1]] %res = atomicrmw umin i32 *%src, i32 %b seq_cst ret i32 %res } @@ -52,13 +58,15 @@ define i32 @f4(i32 %dummy, i32 *%src, i32 %b) { ; CHECK-LABEL: f4: ; CHECK: l %r2, 0(%r3) -; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: lr [[NEW:%r[0-9]+]], %r2 +; CHECK: j [[LOOP:\.[^:]*]] +; CHECK: [[BB1:\.[^:]*]]: +; CHECK: cs %r2, [[NEW:%r[0-9]+]], 0(%r3) +; CHECK: ber %r14 +; CHECK: [[LOOP]]: +; CHECK: lr [[NEW]], %r2 ; CHECK: clrjhe %r2, %r4, [[KEEP:\..*]] ; CHECK: lr [[NEW]], %r4 -; CHECK: cs %r2, [[NEW]], 0(%r3) -; CHECK: ber %r14 -; CHECK: j [[LOOP]] +; CHECK: j [[BB1]] %res = atomicrmw umax i32 *%src, i32 %b seq_cst ret i32 %res } @@ -159,14 +167,15 @@ define i32 @f13(i32 %dummy, i32 *%ptr) { ; CHECK-LABEL: f13: ; CHECK: lhi [[LIMIT:%r[0-9]+]], 42 -; CHECK: l %r2, 0(%r3) -; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: lr [[NEW:%r[0-9]+]], %r2 +; CHECK: j [[LOOP:\.[^:]*]] +; CHECK: [[BB1:\.[^:]*]]: +; CHECK: cs %r2, [[NEW:%r[0-9]+]], 0(%r3) +; CHECK: ber %r14 +; CHECK: [[LOOP]]: +; CHECK: lr [[NEW]], %r2 ; CHECK: crjle %r2, [[LIMIT]], [[KEEP:\..*]] ; CHECK: lhi [[NEW]], 42 -; CHECK: cs %r2, [[NEW]], 0(%r3) -; CHECK: ber %r14 -; CHECK: j [[LOOP]] +; CHECK: j [[BB1]] %res = atomicrmw min i32 *%ptr, i32 42 seq_cst ret i32 %res } Index: test/CodeGen/SystemZ/atomicrmw-minmax-04.ll =================================================================== --- test/CodeGen/SystemZ/atomicrmw-minmax-04.ll +++ test/CodeGen/SystemZ/atomicrmw-minmax-04.ll @@ -7,13 +7,15 @@ define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { ; CHECK-LABEL: f1: ; CHECK: lg %r2, 0(%r3) +; CHECK: j [[LOOP:\.[^:]*]] +; CHECK: [[BB1:\.[^:]*]]: +; CHECK: csg %r2, [[NEW:%r[0-9]+]], 0(%r3) +; CHECK: ber %r14 ; CHECK: [[LOOP:\.[^:]*]]: ; CHECK: lgr [[NEW:%r[0-9]+]], %r2 ; CHECK: cgrjle %r2, %r4, [[KEEP:\..*]] ; CHECK: lgr [[NEW]], %r4 -; CHECK: csg %r2, [[NEW]], 0(%r3) -; CHECK: ber %r14 -; CHECK: j [[LOOP]] +; CHECK: j [[BB1]] %res = atomicrmw min i64 *%src, i64 %b seq_cst ret i64 %res } @@ -22,13 +24,15 @@ define i64 @f2(i64 %dummy, i64 *%src, i64 %b) { ; CHECK-LABEL: f2: ; CHECK: lg %r2, 0(%r3) +; CHECK: j [[LOOP:\.[^:]*]] +; CHECK: [[BB1:\.[^:]*]]: +; CHECK: csg %r2, [[NEW:%r[0-9]+]], 0(%r3) +; CHECK: ber %r14 ; CHECK: [[LOOP:\.[^:]*]]: ; CHECK: lgr [[NEW:%r[0-9]+]], %r2 ; CHECK: cgrjhe %r2, %r4, [[KEEP:\..*]] ; CHECK: lgr [[NEW]], %r4 -; CHECK: csg %r2, [[NEW]], 0(%r3) -; CHECK: ber %r14 -; CHECK: j [[LOOP]] +; CHECK: j [[BB1]] %res = atomicrmw max i64 *%src, i64 %b seq_cst ret i64 %res } @@ -37,13 +41,15 @@ define i64 @f3(i64 %dummy, i64 *%src, i64 %b) { ; CHECK-LABEL: f3: ; CHECK: lg %r2, 0(%r3) +; CHECK: j [[LOOP:\.[^:]*]] +; CHECK: [[BB1:\.[^:]*]]: +; CHECK: csg %r2, [[NEW:%r[0-9]+]], 0(%r3) +; CHECK: ber %r14 ; CHECK: [[LOOP:\.[^:]*]]: ; CHECK: lgr [[NEW:%r[0-9]+]], %r2 ; CHECK: clgrjle %r2, %r4, [[KEEP:\..*]] ; CHECK: lgr [[NEW]], %r4 -; CHECK: csg %r2, [[NEW]], 0(%r3) -; CHECK: ber %r14 -; CHECK: j [[LOOP]] +; CHECK: j [[BB1]] %res = atomicrmw umin i64 *%src, i64 %b seq_cst ret i64 %res } @@ -52,13 +58,15 @@ define i64 @f4(i64 %dummy, i64 *%src, i64 %b) { ; CHECK-LABEL: f4: ; CHECK: lg %r2, 0(%r3) +; CHECK: j [[LOOP:\.[^:]*]] +; CHECK: [[BB1:\.[^:]*]]: +; CHECK: csg %r2, [[NEW:%r[0-9]+]], 0(%r3) +; CHECK: ber %r14 ; CHECK: [[LOOP:\.[^:]*]]: ; CHECK: lgr [[NEW:%r[0-9]+]], %r2 ; CHECK: clgrjhe %r2, %r4, [[KEEP:\..*]] ; CHECK: lgr [[NEW]], %r4 -; CHECK: csg %r2, [[NEW]], 0(%r3) -; CHECK: ber %r14 -; CHECK: j [[LOOP]] +; CHECK: j [[BB1]] %res = atomicrmw umax i64 *%src, i64 %b seq_cst ret i64 %res } @@ -127,13 +135,15 @@ ; CHECK-LABEL: f10: ; CHECK: lghi [[LIMIT:%r[0-9]+]], 42 ; CHECK: lg %r2, 0(%r3) +; CHECK: j [[LOOP:\.[^:]*]] +; CHECK: [[BB1:\.[^:]*]]: +; CHECK: csg %r2, [[NEW:%r[0-9]+]], 0(%r3) +; CHECK: ber %r14 ; CHECK: [[LOOP:\.[^:]*]]: ; CHECK: lgr [[NEW:%r[0-9]+]], %r2 ; CHECK: cgrjle %r2, [[LIMIT]], [[KEEP:\..*]] ; CHECK: lghi [[NEW]], 42 -; CHECK: csg %r2, [[NEW]], 0(%r3) -; CHECK: ber %r14 -; CHECK: j [[LOOP]] +; CHECK: j [[BB1]] %res = atomicrmw min i64 *%ptr, i64 42 seq_cst ret i64 %res }