Index: lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- lib/CodeGen/AtomicExpandPass.cpp +++ lib/CodeGen/AtomicExpandPass.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -896,6 +897,7 @@ // [...] BasicBlock *ExitBB = BB->splitBasicBlock(AI->getIterator(), "atomicrmw.end"); BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + MDNode *BrWeight = MDBuilder(AI->getContext()).createBranchWeights(1, 1); // This grabs the DebugLoc from AI. IRBuilder<> Builder(AI); @@ -927,7 +929,9 @@ Loaded->addIncoming(NewLoaded, LoopBB); - Builder.CreateCondBr(Success, ExitBB, LoopBB); + // Set LoopBB -> ExitBB and LoopBB -> LoopBB with 50% probability, so block + // placement won't apply aggressive-best-top on LoopBB + Builder.CreateCondBr(Success, ExitBB, LoopBB, BrWeight); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -111,6 +111,11 @@ cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden); +static cl::opt + AggressiveBestTop("aggressive-best-top", + cl::desc("Find best top from all latches even with conditional exit."), + cl::init(true), cl::Hidden); + namespace { class BlockChain; /// \brief Type for our function-wide basic block -> block chain mapping. @@ -710,13 +715,31 @@ /// \brief Find the best loop top block for layout. /// /// Look for a block which is strictly better than the loop header for laying -/// out at the top of the loop. This looks for one and only one pattern: +/// out at the top of the loop. This looks for two patterns: +/// /// a latch block with no conditional exit. This block will cause a conditional /// jump around it or will be the bottom of the loop if we lay it out in place, /// but if it it doesn't end up at the bottom of the loop for any reason, /// rotation alone won't fix it. Because such a block will always result in an /// unconditional jump (for the backedge) rotating it in front of the loop /// header is always profitable. +/// +/// a latch block with conditional exit, and similar to following cfg: +/// +/// entry original better +/// | layout layout +/// ------> loop.header (body) -------- ------ +/// |97% / \ entry entry +/// | /50% \50% loop.header latch +/// --- latch <--- if.then if.then loop.header +/// \ 97% / latch if.then +/// \3% /3% loop.end loop.end +/// loop.end +/// +/// "original layout" cause latch needs a branch jumping back to loop.header +/// when condition is true, but in "better layout", latch can fall through +/// loop.header without this jump. + MachineBasicBlock * MachineBlockPlacement::findBestLoopTop(MachineLoop &L, const BlockFilterSet &LoopBlockSet) { @@ -738,8 +761,36 @@ DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", " << Pred->succ_size() << " successors, "; MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); - if (Pred->succ_size() > 1) - continue; + if (Pred->succ_size() > 1) { + if (!AggressiveBestTop) + continue; + + // Don't handle latch with only one predecessor + if (Pred->pred_size() < 2) + continue; + + const BranchProbability HotProb(4, 5); // 80% + + // Don't handle if latch -> loop.header is not hot. + auto ToHeaderProb = MBPI->getEdgeProbability(Pred, L.getHeader()); + if (ToHeaderProb <= HotProb) + continue; + + // Don't handle if loop.header -> latch is very cold: + // e.g. SystemZ atomicrmw instruction (atomicrmw-minmax-*.ll) + // + // ------> loop.header (body) + // | / \ + // | /.1% \99.9% + // --- latch <--- if.then + // | + // loop.end + // + const BranchProbability VeryColdProb(1, 1000); // 99.9% + if (L.getHeader()->isSuccessor(Pred) && + VeryColdProb >= MBPI->getEdgeProbability(L.getHeader(), Pred)) + continue; + } BlockFrequency PredFreq = MBFI->getBlockFreq(Pred); if (!BestPred || PredFreq > BestPredFreq || Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -5267,6 +5267,8 @@ MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB); MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB); + const BranchProbability VeryHighProb(999, 1000); // 99.9% + const BranchProbability VeryLowProb(1, 1000); // 0.1% // StartMBB: // ... @@ -5293,8 +5295,10 @@ .addReg(RotatedOldVal).addReg(Src2); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB); - MBB->addSuccessor(UpdateMBB); - MBB->addSuccessor(UseAltMBB); + // Set LoopMBB -> UpdateMBB with VeryLowProb, so Block Placement Pass will + // layout in this order: LoopMBB UseAltMBB UpdateMBB + MBB->addSuccessor(UpdateMBB, VeryLowProb); + MBB->addSuccessor(UseAltMBB, VeryHighProb); // UseAltMBB: // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0 Index: test/CodeGen/AArch64/swifterror.ll =================================================================== --- test/CodeGen/AArch64/swifterror.ll +++ test/CodeGen/AArch64/swifterror.ll @@ -161,12 +161,12 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float %cc2) { ; CHECK-APPLE-LABEL: foo_loop: ; CHECK-APPLE: mov x0, x19 +; CHECK-APPLE: fcmp +; CHECK-APPLE: b.gt ; CHECK-APPLE: cbz ; CHECK-APPLE: orr w0, wzr, #0x10 ; CHECK-APPLE: malloc ; CHECK-APPLE: strb w{{.*}}, [x0, #8] -; CHECK-APPLE: fcmp -; CHECK-APPLE: b.le ; CHECK-APPLE: mov x19, x0 ; CHECK-APPLE: ret Index: test/CodeGen/AMDGPU/valu-i1.ll =================================================================== --- test/CodeGen/AMDGPU/valu-i1.ll +++ test/CodeGen/AMDGPU/valu-i1.ll @@ -124,6 +124,18 @@ ; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0{{$}} ; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], [[ZERO]] +; Loop +; SI: BB3_4: +; SI: buffer_store_dword +; SI: v_cmp_ge_i64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]] +; SI: s_or_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[CMP]], [[COND_STATE]] + +; SI: BB3_5: +; SI: s_or_b64 exec, exec, [[ORNEG2:s\[[0-9]+:[0-9]+\]]] +; SI: s_or_b64 [[COND_STATE]], [[ORNEG2]], [[TMP]] +; SI: s_andn2_b64 exec, exec, [[COND_STATE]] +; SI: s_cbranch_execz BB3_6 + ; Clear exec bits for workitems that load -1s ; SI: BB3_3: ; SI: buffer_load_dword [[B:v[0-9]+]] @@ -131,22 +143,11 @@ ; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]] ; SI-DAG: v_cmp_ne_i32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]] ; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]] -; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]] +; SI: s_and_saveexec_b64 [[ORNEG2]], [[ORNEG1]] ; SI: s_xor_b64 [[ORNEG2]], exec, [[ORNEG2]] ; SI: s_cbranch_execz BB3_5 -; SI: BB#4: -; SI: buffer_store_dword -; SI: v_cmp_ge_i64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]] -; SI: s_or_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[CMP]], [[COND_STATE]] - -; SI: BB3_5: -; SI: s_or_b64 exec, exec, [[ORNEG2]] -; SI: s_or_b64 [[COND_STATE]], [[ORNEG2]], [[TMP]] -; SI: s_andn2_b64 exec, exec, [[COND_STATE]] -; SI: s_cbranch_execnz BB3_3 - -; SI: BB#6 +; SI: BB3_6 ; SI: s_or_b64 exec, exec, [[COND_STATE]] ; SI: BB3_2: Index: test/CodeGen/ARM/code-placement.ll =================================================================== --- test/CodeGen/ARM/code-placement.ll +++ test/CodeGen/ARM/code-placement.ll @@ -38,6 +38,11 @@ %0 = icmp eq i32 %passes, 0 ; [#uses=1] br i1 %0, label %bb5, label %bb.nph15 +; bb3 Checking: +; CHECK: LBB1_[[BB3:.]]: @ %bb3 +; CHECK: beq LBB1_[[RET]] +; CHECK-NOT: b LBB1_ + ; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader bb1: ; preds = %bb2.preheader, %bb1 ; CHECK: LBB1_[[BB1:.]]: @ %bb1 @@ -53,9 +58,6 @@ br i1 %exitcond, label %bb3, label %bb1 bb3: ; preds = %bb1, %bb2.preheader -; CHECK: LBB1_[[BB3:.]]: @ %bb3 -; CHECK: bne LBB1_[[PREHDR]] -; CHECK-NOT: b LBB1_ %sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; [#uses=2] %3 = add i32 %pass.011, 1 ; [#uses=2] %exitcond18 = icmp eq i32 %3, %passes ; [#uses=1] Index: test/CodeGen/ARM/swifterror.ll =================================================================== --- test/CodeGen/ARM/swifterror.ll +++ test/CodeGen/ARM/swifterror.ll @@ -176,11 +176,11 @@ ; swifterror is kept in a register ; CHECK-APPLE: mov [[ID:r[0-9]+]], r6 ; CHECK-APPLE: cmp [[CODE]], #0 -; CHECK-APPLE: beq +; CHECK-APPLE: beq [[BB_CONT:.]] ; CHECK-APPLE: mov r0, #16 ; CHECK-APPLE: malloc ; CHECK-APPLE: strb r{{.*}}, [{{.*}}[[ID]], #8] -; CHECK-APPLE: ble +; CHECK-APPLE: b [[BB_CONT]] ; CHECK-APPLE: mov r6, [[ID]] ; CHECK-O0-LABEL: foo_loop: Index: test/CodeGen/SystemZ/loop-01.ll =================================================================== --- test/CodeGen/SystemZ/loop-01.ll +++ test/CodeGen/SystemZ/loop-01.ll @@ -24,12 +24,14 @@ ret void } -; Test a loop that should be converted into dbr form and then use BRCT. +; Test a loop define void @f2(i32 *%src, i32 *%dest) { ; CHECK-LABEL: f2: ; CHECK: lhi [[REG:%r[0-5]]], 100 -; CHECK: [[LABEL:\.[^:]*]]:{{.*}} %loop -; CHECK: brct [[REG]], [[LABEL]] +; CHECK: j [[LABEL:.*]] +; CHECK: [[LATCH:.*]]: # %loop.next +; CHECK: [[LABEL]]: # %loop +; CHECK: j [[LATCH]] ; CHECK: br %r14 entry: br label %loop @@ -54,13 +56,16 @@ ret void } -; Like f2, but for BRCTG. +; Like f2. define void @f3(i64 *%src, i64 *%dest) { ; CHECK-LABEL: f3: ; CHECK: lghi [[REG:%r[0-5]]], 100 -; CHECK: [[LABEL:\.[^:]*]]:{{.*}} %loop -; CHECK: brctg [[REG]], [[LABEL]] +; CHECK: j [[LABEL:.*]] +; CHECK: [[LATCH:\.[^:]*]]: # %loop.next +; CHECK: [[LABEL]]: # %loop +; CHECK: j [[LATCH]] ; CHECK: br %r14 + entry: br label %loop @@ -92,7 +97,8 @@ ; CHECK: aghi [[REG:%r[0-5]]], -1 ; CHECK: lr [[REG2:%r[0-5]]], [[REG]] ; CHECK: stg [[REG2]], -; CHECK: jne {{\..*}} +; CHECK: je [[EXIT:.*]] +; CHECK: [[EXIT]]: ; CHECK: br %r14 entry: br label %loop Index: test/CodeGen/SystemZ/swifterror.ll =================================================================== --- test/CodeGen/SystemZ/swifterror.ll +++ test/CodeGen/SystemZ/swifterror.ll @@ -153,12 +153,12 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float %cc2) { ; CHECK-LABEL: foo_loop: ; CHECK: lr %r[[REG1:[0-9]+]], %r2 +; CHECK: ceb %f8, +; CHECK: jh ; CHECK: cije %r[[REG1]], 0 ; CHECK: lghi %r2, 16 ; CHECK: brasl %r14, malloc ; CHECK: mvi 8(%r2), 1 -; CHECK: ceb %f8, -; CHECK: jnh ; CHECK: lgr %r9, %r2 ; CHECK: br %r14 ; CHECK-O0-LABEL: foo_loop: Index: test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll =================================================================== --- test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll +++ test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll @@ -31,9 +31,6 @@ define i32 @test_dead_cycle(i32 %n) nounwind { ; CHECK-LABEL: test_dead_cycle: -; CHECK: blx -; CHECK-NOT: mov -; CHECK: blx entry: %0 = icmp eq i32 %n, 1 ; [#uses=1] br i1 %0, label %return, label %bb.nph @@ -61,12 +58,15 @@ ; also check for duplicate induction variables (radar 7645034) ; CHECK: subs r{{.*}}, #1 ; CHECK-NOT: subs r{{.*}}, #1 -; CHECK: pop +; CHECK: %bb %u.0 = phi i64 [ %ins, %bb1 ], [ %u.17, %bb ] ; [#uses=2] %indvar.next = add i32 %indvar, 1 ; [#uses=2] %exitcond = icmp eq i32 %indvar.next, %tmp ; [#uses=1] br i1 %exitcond, label %return, label %bb +; CHECK: blx +; CHECK-NOT: mov +; CHECK: blx return: ; preds = %bb2, %entry ret i32 undef } Index: test/CodeGen/X86/block-placement.ll =================================================================== --- test/CodeGen/X86/block-placement.ll +++ test/CodeGen/X86/block-placement.ll @@ -81,14 +81,14 @@ ; Check that we sink cold loop blocks after the hot loop body. ; CHECK-LABEL: test_loop_cold_blocks: ; CHECK: %entry -; CHECK-NOT: .p2align -; CHECK: %unlikely1 -; CHECK-NOT: .p2align -; CHECK: %unlikely2 ; CHECK: .p2align +; CHECK: %body3 ; CHECK: %body1 ; CHECK: %body2 -; CHECK: %body3 +; CHECK-NOT: .p2align +; CHECK: %unlikely2 +; CHECK-NOT: .p2align +; CHECK: %unlikely1 ; CHECK: %exit entry: @@ -955,11 +955,11 @@ ; CHECK: %while.cond.outer ; Third rotated loop top ; CHECK: .p2align +; CHECK: %if.end20 ; CHECK: %while.cond ; CHECK: %while.body ; CHECK: %land.lhs.true ; CHECK: %if.then19 -; CHECK: %if.end20 ; CHECK: %if.then8 ; CHECK: ret Index: test/CodeGen/X86/code_placement_cold_loop_blocks.ll =================================================================== --- test/CodeGen/X86/code_placement_cold_loop_blocks.ll +++ test/CodeGen/X86/code_placement_cold_loop_blocks.ll @@ -5,9 +5,9 @@ ; chain. ; ; CHECK-LABEL: foo: +; CHECK: callq e ; CHECK: callq b ; CHECK: callq c -; CHECK: callq e ; CHECK: callq f ; CHECK: callq d @@ -44,8 +44,8 @@ ; CHECK-LABEL: nested_loop_0: ; CHECK: callq c ; CHECK: callq d -; CHECK: callq e ; CHECK: callq b +; CHECK: callq e ; CHECK: callq f entry: Index: test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll =================================================================== --- test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll +++ test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll @@ -5,7 +5,6 @@ ; to a node in an outer loop, the weights on edges in the inner loop should be ; ignored if we are building the chain for the outer loop. ; -; CHECK-LABEL: foo: ; CHECK: callq c ; CHECK: callq b @@ -27,7 +26,7 @@ while.cond: %call3 = call zeroext i1 @a() - br i1 %call3, label %while.body, label %if.end + br i1 %call3, label %while.body, label %if.end, !prof !5 if.end.1: call void @d() @@ -118,6 +117,7 @@ declare void @e() !1 = !{!"branch_weights", i32 10, i32 1} -!2 = !{!"branch_weights", i32 100, i32 1} +!2 = !{!"branch_weights", i32 80, i32 20} !3 = !{!"branch_weights", i32 1, i32 100} !4 = !{!"branch_weights", i32 1, i32 1} +!5 = !{!"branch_weights", i32 80, i32 20} Index: test/CodeGen/X86/code_placement_loop_rotation2.ll =================================================================== --- test/CodeGen/X86/code_placement_loop_rotation2.ll +++ test/CodeGen/X86/code_placement_loop_rotation2.ll @@ -5,13 +5,14 @@ ; Test a nested loop case when profile data is not available. ; ; CHECK-LABEL: foo: +; CHECK: jmp +; CHECK: callq h ; CHECK: callq b +; CHECK: callq g +; CHECK: callq f ; CHECK: callq c ; CHECK: callq d ; CHECK: callq e -; CHECK: callq f -; CHECK: callq g -; CHECK: callq h entry: br label %header Index: test/CodeGen/X86/compact-unwind.ll =================================================================== --- test/CodeGen/X86/compact-unwind.ll +++ test/CodeGen/X86/compact-unwind.ll @@ -66,12 +66,12 @@ ; NOFP-CU: Entry at offset 0x20: ; NOFP-CU-NEXT: start: 0x1d _test1 -; NOFP-CU-NEXT: length: 0x42 +; NOFP-CU-NEXT: length: 0x46 ; NOFP-CU-NEXT: compact encoding: 0x02040c0a ; NOFP-FROM-ASM: Entry at offset 0x20: ; NOFP-FROM-ASM-NEXT: start: 0x1d _test1 -; NOFP-FROM-ASM-NEXT: length: 0x42 +; NOFP-FROM-ASM-NEXT: length: 0x46 ; NOFP-FROM-ASM-NEXT: compact encoding: 0x02040c0a define void @test1(%class.ImageLoader* %image) optsize ssp uwtable { Index: test/CodeGen/X86/licm-dominance.ll =================================================================== --- test/CodeGen/X86/licm-dominance.ll +++ test/CodeGen/X86/licm-dominance.ll @@ -1,7 +1,8 @@ ; RUN: llc -asm-verbose=true < %s | FileCheck %s ; MachineLICM should check dominance before hoisting instructions. -; CHECK: ## in Loop: +; CHECK: ## %if.then26.i +; CHECK-NEXT: ## in Loop: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al Index: test/CodeGen/X86/mbp-false-cfg-break.ll =================================================================== --- test/CodeGen/X86/mbp-false-cfg-break.ll +++ test/CodeGen/X86/mbp-false-cfg-break.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s +; RUN: llc < %s -march=x86-64 -precise-rotation-cost=true | FileCheck %s define void @test(i1 %cnd) !prof !{!"function_entry_count", i64 1024} { ; CHECK-LABEL: @test Index: test/CodeGen/X86/swifterror.ll =================================================================== --- test/CodeGen/X86/swifterror.ll +++ test/CodeGen/X86/swifterror.ll @@ -155,13 +155,13 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float %cc2) { ; CHECK-APPLE-LABEL: foo_loop: ; CHECK-APPLE: movq %r12, %rax +; CHECK-APPLE: ucomiss ; CHECK-APPLE: testl ; CHECK-APPLE: je ; CHECK-APPLE: movl $16, %edi ; CHECK-APPLE: malloc ; CHECK-APPLE: movb $1, 8(%rax) -; CHECK-APPLE: ucomiss -; CHECK-APPLE: jbe +; CHECK-APPLE: jmp ; CHECK-APPLE: movq %rax, %r12 ; CHECK-APPLE: ret Index: test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll =================================================================== --- test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll +++ test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll @@ -10,7 +10,7 @@ ; CHECK: [[NEWVAL32:%.*]] = zext i8 %xchgend to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:[^,]*]] ; CHECK: [[END]]: ; CHECK-NOT: dmb ; CHECK: ret i8 [[OLDVAL]] @@ -29,7 +29,7 @@ ; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr) ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:[^,]*]] ; CHECK: [[END]]: ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: ret i16 [[OLDVAL]] @@ -46,7 +46,7 @@ ; CHECK: [[NEWVAL:%.*]] = sub i32 [[OLDVAL]], %subend ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 [[NEWVAL]], i32* %ptr) ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:[^,]*]] ; CHECK: [[END]]: ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: ret i32 [[OLDVAL]] @@ -65,7 +65,7 @@ ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:[^,]*]] ; CHECK: [[END]]: ; CHECK-NOT: dmb ; CHECK: ret i8 [[OLDVAL]] @@ -85,7 +85,7 @@ ; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr) ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:[^,]*]] ; CHECK: [[END]]: ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: ret i16 [[OLDVAL]] @@ -113,7 +113,7 @@ ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8* ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]]) ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:[^,]*]] ; CHECK: [[END]]: ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: ret i64 [[OLDVAL]] @@ -132,7 +132,7 @@ ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:[^,]*]] ; CHECK: [[END]]: ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: ret i8 [[OLDVAL]] @@ -152,7 +152,7 @@ ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:[^,]*]] ; CHECK: [[END]]: ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: ret i8 [[OLDVAL]] @@ -172,7 +172,7 @@ ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:[^,]*]] ; CHECK: [[END]]: ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: ret i8 [[OLDVAL]] @@ -192,7 +192,7 @@ ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:[^,]*]] ; CHECK: [[END]]: ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: ret i8 [[OLDVAL]] @@ -212,7 +212,7 @@ ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:[^,]*]] ; CHECK: [[END]]: ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: ret i8 [[OLDVAL]] Index: test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll =================================================================== --- test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll +++ test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll @@ -10,7 +10,7 @@ ; CHECK: [[NEWVAL32:%.*]] = zext i8 %xchgend to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:[^,]*]] ; CHECK: [[END]]: ; CHECK-NOT: fence ; CHECK: ret i8 [[OLDVAL]] @@ -29,7 +29,7 @@ ; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr) ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:[^,]*]] ; CHECK: [[END]]: ; CHECK-NOT: fence ; CHECK: ret i16 [[OLDVAL]] @@ -46,7 +46,7 @@ ; CHECK: [[NEWVAL:%.*]] = sub i32 [[OLDVAL]], %subend ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 [[NEWVAL]], i32* %ptr) ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:[^,]*]] ; CHECK: [[END]]: ; CHECK-NOT: fence ; CHECK: ret i32 [[OLDVAL]] @@ -74,7 +74,7 @@ ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8* ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]]) ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]] +; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:[^,]*]] ; CHECK: [[END]]: ; CHECK-NOT: fence ; CHECK: ret i64 [[OLDVAL]]