Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -407,11 +407,15 @@ void buildCFGChains(); void optimizeBranches(); void alignBlocks(); + /// Returns true if a block should be tail-duplicated to increase fallthrough + /// opportunities. bool shouldTailDuplicate(MachineBasicBlock *BB); /// Check the edge frequencies to see if tail duplication will increase /// fallthroughs. bool probabilityJustifiesTailDuplicate( MachineBasicBlock *BB, MachineBasicBlock *Succ); + /// Returns true if a block can tail duplicate into all unplaced + /// predecessors. Filters based on loop. bool canTailDuplicateUnplacedPreds( MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &Chain, const BlockFilterSet *BlockFilter); @@ -576,7 +580,22 @@ return SuccProb; } -/// Check if a block should be tail duplicated. +/// Check if \p BB has exactly the successors in \p Successors. +static bool hasSameSuccessors( + MachineBasicBlock &BB, SmallPtrSetImpl &Successors) { + if (BB.succ_size() != Successors.size()) + return false; + // We don't want to count self-loops + if (Successors.count(&BB)) + return false; + for (MachineBasicBlock *Succ : BB.successors()) + if (!Successors.count(Succ)) + return false; + return true; +} + +/// Check if a block should be tail duplicated to increase fallthrough +/// opportunities. /// \p BB Block to check. bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) { // Blocks with single successors don't create additional fallthrough @@ -631,22 +650,23 @@ // | / | | // Succ Succ /| // / \ | \/ | - // U/ =V = /= = + // U/ =V | == | // / \ | / \| // D E D E // Cost in the first case is: P + V - // Cost in the second case is: Q + QV + PU + PV + // Cost in the second case is: Q + QU + PV if (Dom == nullptr || !Succ->isSuccessor(Dom)) { BranchProbability P = (MBPI->getEdgeProbability(BB, Succ)); BranchProbability Q = P.getCompl(); BranchProbability U = BestSuccSucc; BranchProbability V = U.getCompl(); - BranchProbability QV = Q * V; + BranchProbability PV = P * V; + BranchProbability QU = Q * U; uint64_t BaseCost = static_cast(P.getNumerator()) + static_cast(V.getNumerator()); uint64_t DupCost = static_cast(Q.getNumerator()) + - static_cast(QV.getNumerator()) + - static_cast(P.getNumerator()); + static_cast(QU.getNumerator()) + + static_cast(PV.getNumerator()); return (BaseCost > DupCost); } BranchProbability U = MBPI->getEdgeProbability(Succ, Dom); @@ -690,14 +710,54 @@ if (!shouldTailDuplicate(Succ)) return false; + // For CFG checking. + SmallPtrSet Successors(BB->succ_begin(), BB->succ_end()); for (MachineBasicBlock *Pred : Succ->predecessors()) { // Make sure all unplaced and unfiltered predecessors can be // tail-duplicated into. if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred)) || BlockToChain[Pred] == &Chain) continue; - if (!TailDup.canTailDuplicate(Succ, Pred)) + if (!TailDup.canTailDuplicate(Succ, Pred)) { + if (Successors.size() > 1 + && hasSameSuccessors(*Pred, Successors)) + // This looks like a tail-duplicated block. Skip it. + // We are attempting to identify the CFG that matches a tail-duplicated + // block, rather than keeping a list of blocks for 2 reasons: + // 1) Tail Merging during layout can cause layout to run again, and we + // need to try to be repeatable in that case. + // 2) If the user code created a lattice outside of layout, we would + // also like to lay it out in a chain. + // By checking for the CFG rather than keeping track of the blocks that + // received a copy, we accomplish these 2 goals in addition to laying + // out chains of blocks that can be tail-duplicated sequentially. + // For example: + // A A + // |\ |\ + // | \ | \ + // | C | C+BB + // | / | | + // |/ | | + // BB => BB | + // |\ |\/| + // | \ |/\| + // | D | D + // | / | / + // |/ |/ + // Succ Succ + // + // After BB was duplicated into C, the layout looks like the one on the + // right. BB and C now have the same successors. When considering whether + // Succ can be duplicated into all its unplaced predecessors, we ignore C. + // This allows lattices to be laid out in 2 separate chains (ABE...) and + // later (CD...) This is a reasonable heuristic because it allows the + // creation of 2 fallthrough paths with links between them. + // As above we want to lay out the CFG on the right the same whether it + // was generated by duplication during layout, or by something before + // layout. + continue; return false; + } } return true; } Index: test/CodeGen/AArch64/addsub.ll =================================================================== --- test/CodeGen/AArch64/addsub.ll +++ test/CodeGen/AArch64/addsub.ll @@ -140,12 +140,17 @@ test5: ; CHECK: cmn {{w[0-9]+}}, #444 -; CHECK: b.gt [[RET]] +; CHECK: b.le [[TEST6:.?LBB[0-9]+_[0-9]+]] %newval5 = add i32 %val, 4 store i32 %newval5, i32* @var_i32 %cmp_neg_uge = icmp sgt i32 %val2, -444 br i1 %cmp_neg_uge, label %ret, label %test6 +; CHECK: {{^}}[[RET]]: +; CHECK: ret +; CHECK: {{^}}[[TEST6]]: +; CHECK: ret + test6: %newval6 = add i32 %val, 5 store i32 %newval6, i32* @var_i32 Index: test/CodeGen/AArch64/arm64-atomic.ll =================================================================== --- test/CodeGen/AArch64/arm64-atomic.ll +++ test/CodeGen/AArch64/arm64-atomic.ll @@ -9,10 +9,10 @@ ; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]] ; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]] -; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]] +; CHECK-NEXT: ret ; CHECK-NEXT: [[FAILBB]]: ; CHECK-NEXT: clrex -; CHECK-NEXT: [[EXITBB]]: +; CHECK-NEXT: ret %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire %val = extractvalue { i32, i1 } %pair, 0 ret i32 %val @@ -27,10 +27,12 @@ ; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]] ; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], [[NEW]], [x0] ; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]] -; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]] +; CHECK-NEXT: mov x0, x[[ADDR]] +; CHECK-NEXT: ret ; CHECK-NEXT: [[FAILBB]]: ; CHECK-NEXT: clrex -; CHECK-NEXT: [[EXITBB]]: +; CHECK-NEXT: mov x0, x[[ADDR]] +; CHECK-NEXT: ret %new = load i32, i32* %pnew %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire %val = extractvalue { i32, i1 } %pair, 0 @@ -41,15 +43,15 @@ ; CHECK-LABEL: val_compare_and_swap_rel: ; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0 ; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]: -; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]] +; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]]] ; CHECK-NEXT: cmp [[RESULT]], w1 ; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]] -; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]] +; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]] -; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]] +; CHECK-NEXT: ret ; CHECK-NEXT: [[FAILBB]]: ; CHECK-NEXT: clrex -; CHECK-NEXT: [[EXITBB]]: +; CHECK-NEXT: ret %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic %val = extractvalue { i32, i1 } %pair, 0 ret i32 %val @@ -64,10 +66,10 @@ ; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]] ; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], x2, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]] -; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]] +; CHECK-NEXT: ret ; CHECK-NEXT: [[FAILBB]]: ; CHECK-NEXT: clrex -; CHECK-NEXT: [[EXITBB]]: +; CHECK-NEXT: ret %pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic monotonic %val = extractvalue { i64, i1 } %pair, 0 ret i64 %val Index: test/CodeGen/AArch64/arm64-ccmp.ll =================================================================== --- test/CodeGen/AArch64/arm64-ccmp.ll +++ test/CodeGen/AArch64/arm64-ccmp.ll @@ -108,10 +108,10 @@ ; CHECK: cmp w0, #1 ; CHECK: sdiv [[DIVRES:w[0-9]+]], w1, w0 ; CHECK: ccmp [[DIVRES]], #16, #0, ge -; CHECK: b.gt [[BLOCK:LBB[0-9_]+]] -; CHECK: bl _foo -; CHECK: [[BLOCK]]: +; CHECK: b.le [[BLOCK:LBB[0-9_]+]] ; CHECK: orr w0, wzr, #0x7 +; CHECK: [[BLOCK]]: +; CHECK: bl _foo define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp { entry: %cmp = icmp sgt i32 %a, 0 @@ -135,7 +135,7 @@ ; CHECK: cmp ; CHECK-NOT: b. ; CHECK: fccmp {{.*}}, #8, ge -; CHECK: b.lt +; CHECK: b.ge define i32 @single_fcmp(i32 %a, float %b) nounwind ssp { entry: %cmp = icmp sgt i32 %a, 0 Index: test/CodeGen/AArch64/arm64-shrink-wrapping.ll =================================================================== --- test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -346,19 +346,15 @@ ; CHECK-NEXT: sub w1, w1, #1 ; CHECK-NEXT: add [[SUM]], [[SUM]], [[VA_VAL]] ; CHECK-NEXT: cbnz w1, [[LOOP_LABEL]] -; DISABLE-NEXT: b [[IFEND_LABEL]] -; -; DISABLE: [[ELSE_LABEL]]: ; %if.else -; DISABLE: lsl w0, w1, #1 -; -; CHECK: [[IFEND_LABEL]]: +; CHECK-NEXT: [[IFEND_LABEL]]: ; Epilogue code. ; CHECK: add sp, sp, #16 ; CHECK-NEXT: ret ; -; ENABLE: [[ELSE_LABEL]]: ; %if.else -; ENABLE-NEXT: lsl w0, w1, #1 -; ENABLE_NEXT: ret +; CHECK: [[ELSE_LABEL]]: ; %if.else +; CHECK-NEXT: lsl w0, w1, #1 +; DISABLE-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret define i32 @variadicFunc(i32 %cond, i32 %count, ...) #0 { entry: %ap = alloca i8*, align 8 Index: test/CodeGen/AArch64/branch-relax-cbz.ll =================================================================== --- test/CodeGen/AArch64/branch-relax-cbz.ll +++ test/CodeGen/AArch64/branch-relax-cbz.ll @@ -6,23 +6,18 @@ ; CHECK-NEXT: ; BB#1: ; %b3 ; CHECK: ldr [[LOAD:w[0-9]+]] -; CHECK: cbz [[LOAD]], [[SKIP_LONG_B:LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: b [[B8:LBB[0-9]+_[0-9]+]] - -; CHECK-NEXT: [[SKIP_LONG_B]]: +; CHECK: cbnz [[LOAD]], [[B8:LBB[0-9]+_[0-9]+]] ; CHECK-NEXT: b [[B7:LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: [[B8]]: ; %b8 +; CHECK-NEXT: ret + ; CHECK-NEXT: [[B2]]: ; %b2 ; CHECK: mov w{{[0-9]+}}, #93 ; CHECK: bl _extfunc ; CHECK: cbz w{{[0-9]+}}, [[B7]] +; CHECK-NEXT: b [[B8]] -; CHECK-NEXT: [[B8]]: ; %b8 -; CHECK-NEXT: ret - -; CHECK-NEXT: [[B7]]: ; %b7 -; CHECK: mov w{{[0-9]+}}, #13 -; CHECK: b _extfunc define void @split_block_no_fallthrough(i64 %val) #0 { bb: %c0 = icmp sgt i64 %val, -5 Index: test/CodeGen/AArch64/compare-branch.ll =================================================================== --- test/CodeGen/AArch64/compare-branch.ll +++ test/CodeGen/AArch64/compare-branch.ll @@ -27,7 +27,7 @@ %val4 = load volatile i64, i64* @var64 %tst4 = icmp ne i64 %val4, 0 br i1 %tst4, label %end, label %test5, !prof !1 -; CHECK: cbnz {{x[0-9]+}}, .LBB +; CHECK: cbz {{x[0-9]+}}, .LBB test5: store volatile i64 %val4, i64* @var64 Index: test/CodeGen/AArch64/logical_shifted_reg.ll =================================================================== --- test/CodeGen/AArch64/logical_shifted_reg.ll +++ test/CodeGen/AArch64/logical_shifted_reg.ll @@ -210,7 +210,7 @@ test3: ; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}, asr #12 -; CHECK: b.gt .L +; CHECK: b.le .L %asr_op = ashr i64 %val2, 12 %asr_and = and i64 %asr_op, %val1 %tst3 = icmp sgt i64 %asr_and, 0 Index: test/CodeGen/AArch64/optimize-cond-branch.ll =================================================================== --- test/CodeGen/AArch64/optimize-cond-branch.ll +++ test/CodeGen/AArch64/optimize-cond-branch.ll @@ -11,7 +11,8 @@ ; ; CHECK-LABEL: func ; CHECK-NOT: and -; CHECK: tbnz +; Layout reverses the test. +; CHECK: tbz define void @func() { %c0 = icmp sgt i64 0, 0 br i1 %c0, label %b1, label %b6 Index: test/CodeGen/AArch64/tbz-tbnz.ll =================================================================== --- test/CodeGen/AArch64/tbz-tbnz.ll +++ test/CodeGen/AArch64/tbz-tbnz.ll @@ -10,7 +10,7 @@ br i1 %cmp, label %if.then, label %if.end ; CHECK: sub [[CMP:w[0-9]+]], w0, #12 -; CHECK: tbz [[CMP]], #31 +; CHECK: tbnz [[CMP]], #31 if.then: call void @t() @@ -28,7 +28,7 @@ br i1 %cmp, label %if.then, label %if.end ; CHECK: sub [[CMP:x[0-9]+]], x0, #12 -; CHECK: tbz [[CMP]], #63 +; CHECK: tbnz [[CMP]], #63 if.then: call void @t() @@ -118,7 +118,7 @@ br i1 %cmp, label %if.then, label %if.end ; CHECK: sub [[CMP:w[0-9]+]], w0, #12 -; CHECK: tbz [[CMP]], #31 +; CHECK: tbnz [[CMP]], #31 if.then: call void @t() @@ -178,7 +178,7 @@ br i1 %tst, label %if.then, label %if.end ; CHECK-NOT: cmp -; CHECK: tbz x0, #63 +; CHECK: tbnz x0, #63 if.then: call void @t() @@ -194,7 +194,7 @@ br i1 %tst, label %if.then, label %if.end ; CHECK-NOT: cmp -; CHECK: tbz x0, #63 +; CHECK: tbnz x0, #63 if.then: call void @t() @@ -209,7 +209,7 @@ ; CHECK: ldr [[CMP:x[0-9]+]], [x1] ; CHECK-NOT: cmp -; CHECK: tbz [[CMP]], #63 +; CHECK: tbnz [[CMP]], #63 %val = load i64, i64* %ptr %tst = icmp slt i64 %val, 0 @@ -229,7 +229,7 @@ br i1 %tst, label %if.then, label %if.end ; CHECK-NOT: cmp -; CHECK: tbz x0, #63 +; CHECK: tbnz x0, #63 if.then: call void @t() @@ -247,7 +247,7 @@ ; CHECK: orr [[CMP:x[0-9]+]], x0, x1 ; CHECK-NOT: cmp -; CHECK: tbz [[CMP]], #63 +; CHECK: tbnz [[CMP]], #63 if.then: call void @t() @@ -262,7 +262,7 @@ br i1 %cond, label %if.end, label %if.then ; CHECK-NOT: and -; CHECK: tbnz w0, #0 +; CHECK: tbz w0, #0 if.then: call void @t() Index: test/CodeGen/AMDGPU/basic-branch.ll =================================================================== --- test/CodeGen/AMDGPU/basic-branch.ll +++ test/CodeGen/AMDGPU/basic-branch.ll @@ -8,13 +8,10 @@ ; GCNNOOPT: v_writelane_b32 ; GCN: s_cbranch_scc1 [[END:BB[0-9]+_[0-9]+]] - -; GCN: ; BB#1 ; GCNNOOPT: v_readlane_b32 ; GCNNOOPT: v_readlane_b32 ; GCN: buffer_store_dword -; GCNOPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; TODO: This waitcnt can be eliminated +; GCNNOOPT: s_endpgm ; GCN: {{^}}[[END]]: ; GCN: s_endpgm Index: test/CodeGen/AMDGPU/cf-loop-on-constant.ll =================================================================== --- test/CodeGen/AMDGPU/cf-loop-on-constant.ll +++ test/CodeGen/AMDGPU/cf-loop-on-constant.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs -O0 < %s ; GCN-LABEL: {{^}}test_loop: -; GCN: [[LABEL:BB[0-9+]_[0-9]+]]: +; GCN: [[LABEL:BB[0-9+]_[0-9]+]]: ; %for.body{{$}} ; GCN: ds_read_b32 ; GCN: ds_write_b32 ; GCN: s_branch [[LABEL]] Index: test/CodeGen/AMDGPU/convergent-inlineasm.ll =================================================================== --- test/CodeGen/AMDGPU/convergent-inlineasm.ll +++ test/CodeGen/AMDGPU/convergent-inlineasm.ll @@ -29,6 +29,7 @@ ; GCN: v_cmp_ne_u32_e64 ; GCN: BB{{[0-9]+_[0-9]+}}: + define void @nonconvergent_inlineasm(i64 addrspace(1)* nocapture %arg) { bb: %tmp = call i32 @llvm.amdgcn.workitem.id.x() Index: test/CodeGen/AMDGPU/salu-to-valu.ll =================================================================== --- test/CodeGen/AMDGPU/salu-to-valu.ll +++ test/CodeGen/AMDGPU/salu-to-valu.ll @@ -439,7 +439,7 @@ ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN-NOHSA: buffer_store_dword [[ONE]] ; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[ONE]] -; GCN; {{^}}[[EXIT]]: +; GCN: {{^}}[[EXIT]]: ; GCN: s_endpgm define void @sopc_vopc_legalize_bug(i32 %cond, i32 addrspace(1)* %out, i32 addrspace(1)* %in) { bb3: ; preds = %bb2 Index: test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll =================================================================== --- test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll +++ test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll @@ -37,7 +37,10 @@ ; OPT-NOT: call i1 @llvm.amdgcn.loop ; GCN-LABEL: {{^}}annotate_ret_noloop: -; GCN: s_cbranch_scc1 +; GCN: s_cbranch_scc0 [[BODY:BB[0-9]+_[0-9]+]] +; GCN: s_endpgm + +; GCN: {{^}}[[BODY]]: ; GCN: s_endpgm ; GCN: .Lfunc_end1 define void @annotate_ret_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 { Index: test/CodeGen/AMDGPU/uniform-cfg.ll =================================================================== --- test/CodeGen/AMDGPU/uniform-cfg.ll +++ test/CodeGen/AMDGPU/uniform-cfg.ll @@ -252,10 +252,12 @@ ; GCN: s_cmp_lt_i32 [[COND]], 1 ; GCN: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]] ; GCN: v_cmp_gt_i32_e64 vcc, [[COND]], 0{{$}} -; GCN: s_cbranch_vccnz [[EXIT]] -; GCN: buffer_store +; GCN: s_cbranch_vccz [[BODY:[A-Za-z0-9_]+]] ; GCN: {{^}}[[EXIT]]: ; GCN: s_endpgm +; GCN: {{^}}[[BODY]]: +; GCN: buffer_store +; GCN: s_endpgm define void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, i32 addrspace(1)* %out) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 @@ -302,9 +304,10 @@ ; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc ; GCN: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] -; GCN: s_cbranch_execz [[ENDIF_LABEL:[0-9_A-Za-z]+]] ; GCN: s_cmp_lg_u32 {{s[0-9]+}}, 0 -; GCN: s_cbranch_scc1 [[ENDIF_LABEL]] +; GCN: s_cbranch_scc0 [[IF_UNIFORM_LABEL:[A-Z0-9_a-z]+]] +; GCN: s_endpgm +; GCN: {{^}}[[IF_UNIFORM_LABEL]]: ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN: buffer_store_dword [[ONE]] define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) { @@ -328,14 +331,13 @@ ; GCN-LABEL: {{^}}divergent_inside_uniform: ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 -; GCN: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]] +; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]] +; GCN: [[IF_LABEL]]: ; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc ; GCN: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN: buffer_store_dword [[ONE]] -; GCN: [[ENDIF_LABEL]]: -; GCN: s_endpgm define void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) { entry: %u_cmp = icmp eq i32 %cond, 0 @@ -363,11 +365,11 @@ ; GCN: buffer_store_dword [[ONE]] ; GCN: s_or_b64 exec, exec, [[MASK]] ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 -; GCN: s_cbranch_scc1 [[EXIT:[A-Z0-9_]+]] +; GCN: s_cbranch_scc0 [[IF_UNIFORM:[A-Z0-9_]+]] +; GCN: s_endpgm +; GCN: [[IF_UNIFORM]]: ; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 ; GCN: buffer_store_dword [[TWO]] -; GCN: [[EXIT]]: -; GCN: s_endpgm define void @divergent_if_uniform_if(i32 addrspace(1)* %out, i32 %cond) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 @@ -398,16 +400,20 @@ ; GCN-LABEL: {{^}}cse_uniform_condition_different_blocks: ; GCN: s_load_dword [[COND:s[0-9]+]] ; GCN: s_cmp_lt_i32 [[COND]], 1 -; GCN: s_cbranch_scc1 BB[[FNNUM:[0-9]+]]_3 +; GCN: s_cbranch_scc1 [[FN:BB[0-9_]+]] ; GCN: BB#1: ; GCN-NOT: cmp ; GCN: buffer_load_dword ; GCN: buffer_store_dword -; GCN: s_cbranch_scc1 BB[[FNNUM]]_3 +; GCN: s_cbranch_scc0 [[BB7:BB[0-9_]+]] -; GCN: BB[[FNNUM]]_3: +; GCN: [[FN]]: ; GCN: s_endpgm + +; GCN: [[BB7]]: +; GCN: s_endpgm + define void @cse_uniform_condition_different_blocks(i32 %cond, i32 addrspace(1)* %out) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 Index: test/CodeGen/ARM/arm-and-tst-peephole.ll =================================================================== --- test/CodeGen/ARM/arm-and-tst-peephole.ll +++ test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -49,9 +49,9 @@ ; V8-NEXT: beq ; V8-NEXT: %tailrecurse.switch ; V8: cmp -; V8-NEXT: bne -; V8-NEXT: b -; The trailing space in the last line checks that the branch is unconditional +; V8-NEXT: beq +; V8-NEXT: %sw.epilog +; V8-NEXT: bx lr switch i32 %and, label %sw.epilog [ i32 1, label %sw.bb i32 3, label %sw.bb6 Index: test/CodeGen/ARM/atomic-cmpxchg.ll =================================================================== --- test/CodeGen/ARM/atomic-cmpxchg.ll +++ test/CodeGen/ARM/atomic-cmpxchg.ll @@ -66,14 +66,14 @@ ; CHECK-ARMV7-NEXT: [[HEAD:.LBB[0-9_]+]]: ; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0] ; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0 -; CHECK-ARMV7-NEXT: moveq [[RES:r[0-9]+]], #1 +; CHECK-ARMV7-NEXT: moveq r0, #1 ; CHECK-ARMV7-NEXT: bxeq lr ; CHECK-ARMV7-NEXT: [[TRY]]: -; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0] -; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]] +; CHECK-ARMV7-NEXT: ldrexb [[SUCCESS]], [r0] +; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], r1 ; CHECK-ARMV7-NEXT: beq [[HEAD]] ; CHECK-ARMV7-NEXT: clrex -; CHECK-ARMV7-NEXT: mov [[RES]], #0 +; CHECK-ARMV7-NEXT: mov r0, #0 ; CHECK-ARMV7-NEXT: bx lr ; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8: Index: test/CodeGen/ARM/atomic-op.ll =================================================================== --- test/CodeGen/ARM/atomic-op.ll +++ test/CodeGen/ARM/atomic-op.ll @@ -320,10 +320,10 @@ ; CHECK: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]] ; CHECK: cmp [[SUCCESS]], #0 ; CHECK: bne [[LOOP_BB]] -; CHECK: b [[END_BB:\.?LBB[0-9]+_[0-9]+]] +; CHECK: dmb ish +; CHECK: bx lr ; CHECK: [[FAIL_BB]]: ; CHECK-NEXT: clrex -; CHECK-NEXT: [[END_BB]]: ; CHECK: dmb ish ; CHECK: bx lr Index: test/CodeGen/ARM/atomic-ops-v8.ll =================================================================== --- test/CodeGen/ARM/atomic-ops-v8.ll +++ test/CodeGen/ARM/atomic-ops-v8.ll @@ -1045,20 +1045,21 @@ ; function there. ; CHECK-ARM-NEXT: cmp r[[OLD]], r0 ; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]] -; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_4 ; CHECK-NEXT: BB#2: ; As above, r1 is a reasonable guess. ; CHECK: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NEXT: b .LBB{{[0-9]+}}_4 -; CHECK-NEXT: .LBB{{[0-9]+}}_3: -; CHECK-NEXT: clrex +; CHECK-ARM: mov r0, r[[OLD]] +; CHECK: bx lr ; CHECK-NEXT: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: clrex ; CHECK-NOT: dmb ; CHECK-NOT: mcr ; CHECK-ARM: mov r0, r[[OLD]] +; CHECK-ARM-NEXT: bx lr ret i8 %old } @@ -1078,20 +1079,21 @@ ; function there. ; CHECK-ARM-NEXT: cmp r[[OLD]], r0 ; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]] -; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_4 ; CHECK-NEXT: BB#2: ; As above, r1 is a reasonable guess. ; CHECK: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NEXT: b .LBB{{[0-9]+}}_4 -; CHECK-NEXT: .LBB{{[0-9]+}}_3: -; CHECK-NEXT: clrex +; CHECK-ARM: mov r0, r[[OLD]] +; CHECK: bx lr ; CHECK-NEXT: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: clrex ; CHECK-NOT: dmb ; CHECK-NOT: mcr ; CHECK-ARM: mov r0, r[[OLD]] +; CHECK-ARM-NEXT: bx lr ret i16 %old } @@ -1110,20 +1112,21 @@ ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp r[[OLD]], r0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_4 ; CHECK-NEXT: BB#2: ; As above, r1 is a reasonable guess. ; CHECK: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NEXT: b .LBB{{[0-9]+}}_4 -; CHECK-NEXT: .LBB{{[0-9]+}}_3: -; CHECK-NEXT: clrex +; CHECK: str{{(.w)?}} r[[OLD]], +; CHECK-NEXT: bx lr ; CHECK-NEXT: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: clrex ; CHECK-NOT: dmb ; CHECK-NOT: mcr ; CHECK: str{{(.w)?}} r[[OLD]], +; CHECK-ARM-NEXT: bx lr ret void } @@ -1148,16 +1151,16 @@ ; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0 ; CHECK-ARM-BE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_HI]], [[MISMATCH_LO]] ; CHECK-THUMB-BE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_LO]], [[MISMATCH_HI]] -; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_4 ; CHECK-NEXT: BB#2: ; As above, r2, r3 is a reasonable guess. ; CHECK: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NEXT: b .LBB{{[0-9]+}}_4 -; CHECK-NEXT: .LBB{{[0-9]+}}_3: -; CHECK-NEXT: clrex +; CHECK: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] +; CHECK-NEXT: pop ; CHECK-NEXT: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: clrex ; CHECK-NOT: dmb ; CHECK-NOT: mcr Index: test/CodeGen/ARM/fold-stack-adjust.ll =================================================================== --- test/CodeGen/ARM/fold-stack-adjust.ll +++ test/CodeGen/ARM/fold-stack-adjust.ll @@ -135,7 +135,7 @@ ; Important to check for beginning of basic block, because if it gets ; if-converted the test is probably no longer checking what it should. -; CHECK: {{LBB[0-9]+_2}}: +; CHECK: %end ; CHECK-NEXT: vpop {d7, d8} ; CHECK-NEXT: pop {r4, pc} Index: test/CodeGen/ARM/machine-cse-cmp.ll =================================================================== --- test/CodeGen/ARM/machine-cse-cmp.ll +++ test/CodeGen/ARM/machine-cse-cmp.ll @@ -52,7 +52,7 @@ ; CHECK-LABEL: f3: ; CHECK-NOT: sub ; CHECK: cmp -; CHECK: blt +; CHECK: bge %0 = load i32, i32* %offset, align 4 %cmp = icmp slt i32 %0, %size %s = sub nsw i32 %0, %size Index: test/CodeGen/Mips/llvm-ir/ashr.ll =================================================================== --- test/CodeGen/Mips/llvm-ir/ashr.ll +++ test/CodeGen/Mips/llvm-ir/ashr.ll @@ -91,12 +91,13 @@ ; M2: sllv $[[T5:[0-9]+]], $[[T4]], $[[T3]] ; M2: or $3, $[[T3]], $[[T2]] ; M2: $[[BB0]]: - ; M2: beqz $[[T1]], $[[BB1:BB[0-9_]+]] + ; M2: bnez $[[T1]], $[[BB1:BB[0-9_]+]] ; M2: nop - ; M2: sra $2, $4, 31 - ; M2: $[[BB1]]: ; M2: jr $ra ; M2: nop + ; M2: $[[BB1]]: + ; M2: jr $ra + ; M2: sra $2, $4, 31 ; 32R1-R5: srlv $[[T0:[0-9]+]], $5, $7 ; 32R1-R5: not $[[T1:[0-9]+]], $7 @@ -177,12 +178,13 @@ ; M3: dsllv $[[T7:[0-9]+]], $[[T5]], $[[T6]] ; M3: or $3, $[[T7]], $[[T4]] ; M3: [[BB0]]: - ; M3: beqz $[[T3]], [[BB1:.LBB[0-9_]+]] + ; M3: bnez $[[T3]], [[BB1:.LBB[0-9_]+]] ; M3: nop - ; M3: dsra $2, $4, 63 - ; M3: [[BB1]]: ; M3: jr $ra ; M3: nop + ; M3: [[BB1]]: + ; M3: jr $ra + ; M3: dsra $2, $4, 63 ; GP64-NOT-R6: dsrlv $[[T0:[0-9]+]], $5, $7 ; GP64-NOT-R6: dsll $[[T1:[0-9]+]], $4, 1 Index: test/CodeGen/PowerPC/tail-dup-layout.ll =================================================================== --- test/CodeGen/PowerPC/tail-dup-layout.ll +++ test/CodeGen/PowerPC/tail-dup-layout.ll @@ -1,51 +1,51 @@ -; RUN: llc -outline-optional-branches -O2 < %s | FileCheck %s +; RUN: llc -O2 < %s | FileCheck %s target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le-grtev4-linux-gnu" ; Intended layout: -; The outlining flag produces the layout +; The chain-based outlining produces the layout ; test1 ; test2 ; test3 ; test4 -; exit ; optional1 ; optional2 ; optional3 ; optional4 +; exit ; Tail duplication puts test n+1 at the end of optional n ; so optional1 includes a copy of test2 at the end, and branches ; to test3 (at the top) or falls through to optional 2. -; The CHECK statements check for the whole string of tests and exit block, +; The CHECK statements check for the whole string of tests ; and then check that the correct test has been duplicated into the end of ; the optional blocks and that the optional blocks are in the correct order. ;CHECK-LABEL: straight_test: ; test1 may have been merged with entry ;CHECK: mr [[TAGREG:[0-9]+]], 3 ;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 -;CHECK-NEXT: bc 12, 1, [[OPT1LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: [[TEST2LABEL:[._0-9A-Za-z]+]]: # %test2 +;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[._0-9A-Za-z]+]] +;CHECK-NEXT: # %test2 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 -;CHECK-NEXT: bne 0, [[OPT2LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: [[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3 +;CHECK-NEXT: bne 0, .[[OPT2LABEL:[._0-9A-Za-z]+]] +;CHECK-NEXT: .[[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 ;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: [[TEST4LABEL:[._0-9A-Za-z]+]]: # %test4 +;CHECK-NEXT: .[[TEST4LABEL:[._0-9A-Za-z]+]]: # %test4 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 -;CHECK-NEXT: bne 0, .[[OPT4LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit -;CHECK: blr -;CHECK-NEXT: [[OPT1LABEL]] +;CHECK-NEXT: beq 0, .[[EXITLABEL:[._0-9A-Za-z]+]] +;CHECK-NEXT: b .[[OPT4LABEL:[._0-9A-Za-z]+]] +;CHECK-NEXT: .[[OPT1LABEL]] ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 -;CHECK-NEXT: beq 0, [[TEST3LABEL]] -;CHECK-NEXT: [[OPT2LABEL]] +;CHECK-NEXT: beq 0, .[[TEST3LABEL]] +;CHECK-NEXT: .[[OPT2LABEL]] ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 -;CHECK-NEXT: beq 0, [[TEST4LABEL]] -;CHECK-NEXT: [[OPT3LABEL]] +;CHECK-NEXT: beq 0, .[[TEST4LABEL]] +;CHECK-NEXT: .[[OPT3LABEL]] ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 -;CHECK-NEXT: beq 0, [[EXITLABEL]] -;CHECK-NEXT: [[OPT4LABEL]] -;CHECK: b [[EXITLABEL]] +;CHECK-NEXT: beq 0, .[[EXITLABEL]] +;CHECK-NEXT: .[[OPT4LABEL]] +;CHECK: .[[EXITLABEL]]: # %exit +;CHECK: blr define void @straight_test(i32 %tag) { entry: @@ -53,7 +53,7 @@ test1: %tagbit1 = and i32 %tag, 1 %tagbit1eq0 = icmp eq i32 %tagbit1, 0 - br i1 %tagbit1eq0, label %test2, label %optional1 + br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1 optional1: call void @a() call void @a() @@ -63,7 +63,7 @@ test2: %tagbit2 = and i32 %tag, 2 %tagbit2eq0 = icmp eq i32 %tagbit2, 0 - br i1 %tagbit2eq0, label %test3, label %optional2 + br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1 optional2: call void @b() call void @b() @@ -73,7 +73,7 @@ test3: %tagbit3 = and i32 %tag, 4 %tagbit3eq0 = icmp eq i32 %tagbit3, 0 - br i1 %tagbit3eq0, label %test4, label %optional3 + br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1 optional3: call void @c() call void @c() @@ -83,7 +83,7 @@ test4: %tagbit4 = and i32 %tag, 8 %tagbit4eq0 = icmp eq i32 %tagbit4, 0 - br i1 %tagbit4eq0, label %exit, label %optional4 + br i1 %tagbit4eq0, label %exit, label %optional4, !prof !1 optional4: call void @d() call void @d() @@ -94,6 +94,113 @@ ret void } +; Intended layout: +; The chain-based outlining produces the layout +; entry +; --- Begin loop --- +; for.latch +; for.check +; test1 +; test2 +; test3 +; test4 +; optional1 +; optional2 +; optional3 +; optional4 +; --- End loop --- +; exit +; The CHECK statements check for the whole string of tests and exit block, +; and then check that the correct test has been duplicated into the end of +; the optional blocks and that the optional blocks are in the correct order. +;CHECK-LABEL: loop_test: +;CHECK: add [[TAGPTRREG:[0-9]+]], 3, 4 +;CHECK: .[[LATCHLABEL:[._0-9A-Za-z]+]]: # %for.latch +;CHECK: addi +;CHECK: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check +;CHECK: lwz [[TAGREG:[0-9]+]], 0([[TAGPTRREG]]) +;CHECK: # %test1 +;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 +;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[._0-9A-Za-z]+]] +;CHECK-NEXT: # %test2 +;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 +;CHECK-NEXT: bne 0, .[[OPT2LABEL:[._0-9A-Za-z]+]] +;CHECK-NEXT: .[[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3 +;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 +;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]] +;CHECK-NEXT: .[[TEST4LABEL:[._0-9A-Za-z]+]]: # %{{(test4|optional3)}} +;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 +;CHECK-NEXT: beq 0, .[[LATCHLABEL]] +;CHECK-NEXT: b .[[OPT4LABEL:[._0-9A-Za-z]+]] +;CHECK: [[OPT1LABEL]] +;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 +;CHECK-NEXT: beq 0, .[[TEST3LABEL]] +;CHECK-NEXT: .[[OPT2LABEL]] +;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 +;CHECK-NEXT: beq 0, .[[TEST4LABEL]] +;CHECK-NEXT: .[[OPT3LABEL]] +;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 +;CHECK-NEXT: beq 0, .[[LATCHLABEL]] +;CHECK-NEXT: .[[OPT4LABEL]] +;CHECK: b .[[LATCHLABEL]] +define void @loop_test(i32* %tags, i32 %count) { +entry: + br label %for.check +for.check: + %count.loop = phi i32 [%count, %entry], [%count.sub, %for.latch] + %done.count = icmp ugt i32 %count.loop, 0 + %tag_ptr = getelementptr inbounds i32, i32* %tags, i32 %count + %tag = load i32, i32* %tag_ptr + %done.tag = icmp eq i32 %tag, 0 + %done = and i1 %done.count, %done.tag + br i1 %done, label %test1, label %exit, !prof !1 +test1: + %tagbit1 = and i32 %tag, 1 + %tagbit1eq0 = icmp eq i32 %tagbit1, 0 + br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1 +optional1: + call void @a() + call void @a() + call void @a() + call void @a() + br label %test2 +test2: + %tagbit2 = and i32 %tag, 2 + %tagbit2eq0 = icmp eq i32 %tagbit2, 0 + br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1 +optional2: + call void @b() + call void @b() + call void @b() + call void @b() + br label %test3 +test3: + %tagbit3 = and i32 %tag, 4 + %tagbit3eq0 = icmp eq i32 %tagbit3, 0 + br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1 +optional3: + call void @c() + call void @c() + call void @c() + call void @c() + br label %test4 +test4: + %tagbit4 = and i32 %tag, 8 + %tagbit4eq0 = icmp eq i32 %tagbit4, 0 + br i1 %tagbit4eq0, label %for.latch, label %optional4, !prof !1 +optional4: + call void @d() + call void @d() + call void @d() + call void @d() + br label %for.latch +for.latch: + %count.sub = sub i32 %count.loop, 1 + br label %for.check +exit: + ret void +} + ; The block then2 is not unavoidable, but since it can be tail-duplicated, it ; should be placed as a fallthrough from test2 and copied. ; CHECK-LABEL: avoidable_test: @@ -105,7 +212,6 @@ ; CHECK: # %then2 ; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29 ; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} -; CHECK: # %end2 ; CHECK: # %else1 ; CHECK: bl a ; CHECK: bl a @@ -113,6 +219,7 @@ ; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29 ; CHECK: # %else2 ; CHECK: bl c +; CHECK: # %end2 define void @avoidable_test(i32 %tag) { entry: br label %test1 @@ -141,10 +248,9 @@ call void @d() ret void } - declare void @a() declare void @b() declare void @c() declare void @d() -!1 = !{!"branch_weights", i32 2, i32 1} +!1 = !{!"branch_weights", i32 5, i32 3} Index: test/CodeGen/SystemZ/asm-18.ll =================================================================== --- test/CodeGen/SystemZ/asm-18.ll +++ test/CodeGen/SystemZ/asm-18.ll @@ -297,7 +297,7 @@ ; CHECK: iihf [[REG]], 2102030405 ; CHECK: blah [[REG]] ; CHECK: br %r14 - %cmp = icmp eq i32 %x, 0 + %cmp = icmp ne i32 %x, 0 %val = select i1 %cmp, i32 0, i32 2102030405 call void asm sideeffect "blah $0", "h"(i32 %val) ret void @@ -311,7 +311,7 @@ ; CHECK: iilf [[REG]], 2102030405 ; CHECK: blah [[REG]] ; CHECK: br %r14 - %cmp = icmp eq i32 %x, 0 + %cmp = icmp ne i32 %x, 0 %val = select i1 %cmp, i32 0, i32 2102030405 call void asm sideeffect "blah $0", "r"(i32 %val) ret void Index: test/CodeGen/SystemZ/cond-store-01.ll =================================================================== --- test/CodeGen/SystemZ/cond-store-01.ll +++ test/CodeGen/SystemZ/cond-store-01.ll @@ -297,8 +297,11 @@ define void @f18(i8 *%ptr, i8 %alt, i32 %limit) { ; CHECK-LABEL: f18: ; CHECK: lb {{%r[0-5]}}, 0(%r2) -; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: {{jhe|jnhe}} [[LABEL:[^ ]*]] +; CHECK: stc {{%r[0-5]}}, 0(%r2) +; CHECK: br %r14 ; CHECK: [[LABEL]]: +; CHECK: lr {{%r[0-5]}}, {{%r[0-5]}} ; CHECK: stc {{%r[0-5]}}, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 @@ -331,8 +334,11 @@ ; FIXME: should use a normal load instead of CS. ; CHECK-LABEL: f20: ; CHECK: lb {{%r[0-9]+}}, 0(%r2) -; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: {{jhe|jnhe}} [[LABEL:[^ ]*]] +; CHECK: stc {{%r[0-9]+}}, 0(%r2) +; CHECK: br %r14 ; CHECK: [[LABEL]]: +; CHECK: lr {{%r[0-5]}}, {{%r[0-5]}} ; CHECK: stc {{%r[0-9]+}}, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 Index: test/CodeGen/SystemZ/cond-store-02.ll =================================================================== --- test/CodeGen/SystemZ/cond-store-02.ll +++ test/CodeGen/SystemZ/cond-store-02.ll @@ -297,8 +297,11 @@ define void @f18(i16 *%ptr, i16 %alt, i32 %limit) { ; CHECK-LABEL: f18: ; CHECK: lh {{%r[0-5]}}, 0(%r2) -; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: {{jhe|jnhe}} [[LABEL:[^ ]*]] +; CHECK: sth {{%r[0-5]}}, 0(%r2) +; CHECK: br %r14 ; CHECK: [[LABEL]]: +; CHECK: lr {{%r[0-5]}}, {{%r[0-5]}} ; CHECK: sth {{%r[0-5]}}, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 @@ -331,8 +334,11 @@ ; FIXME: should use a normal load instead of CS. ; CHECK-LABEL: f20: ; CHECK: lh {{%r[0-9]+}}, 0(%r2) -; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: {{jhe|jnhe}} [[LABEL:[^ ]*]] +; CHECK: sth {{%r[0-9]+}}, 0(%r2) +; CHECK: br %r14 ; CHECK: [[LABEL]]: +; CHECK: lr {{%r[0-9]+}}, {{%r[0-9]+}} ; CHECK: sth {{%r[0-9]+}}, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 Index: test/CodeGen/SystemZ/cond-store-03.ll =================================================================== --- test/CodeGen/SystemZ/cond-store-03.ll +++ test/CodeGen/SystemZ/cond-store-03.ll @@ -226,8 +226,11 @@ define void @f14(i32 *%ptr, i32 %alt, i32 %limit) { ; CHECK-LABEL: f14: ; CHECK: l {{%r[0-5]}}, 0(%r2) -; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: {{jhe|jnhe}} [[LABEL:[^ ]*]] +; CHECK: st {{%r[0-5]}}, 0(%r2) +; CHECK: br %r14 ; CHECK: [[LABEL]]: +; CHECK: lr {{%r[0-5]}}, {{%r[0-5]}} ; CHECK: st {{%r[0-5]}}, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 @@ -260,8 +263,11 @@ ; FIXME: should use a normal load instead of CS. ; CHECK-LABEL: f16: ; CHECK: l {{%r[0-5]}}, 0(%r2) -; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: {{jhe|jnhe}} [[LABEL:[^ ]*]] +; CHECK: st {{%r[0-5]}}, 0(%r2) +; CHECK: br %r14 ; CHECK: [[LABEL]]: +; CHECK: lr {{%r[0-5]}}, {{%r[0-5]}} ; CHECK: st {{%r[0-5]}}, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 Index: test/CodeGen/SystemZ/cond-store-04.ll =================================================================== --- test/CodeGen/SystemZ/cond-store-04.ll +++ test/CodeGen/SystemZ/cond-store-04.ll @@ -124,8 +124,11 @@ define void @f8(i64 *%ptr, i64 %alt, i32 %limit) { ; CHECK-LABEL: f8: ; CHECK: lg {{%r[0-5]}}, 0(%r2) -; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: {{jhe|jnhe}} [[LABEL:[^ ]*]] +; CHECK: stg {{%r[0-5]}}, 0(%r2) +; CHECK: br %r14 ; CHECK: [[LABEL]]: +; CHECK: lgr {{%r[0-5]}}, {{%r[0-5]}} ; CHECK: stg {{%r[0-5]}}, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 @@ -158,8 +161,11 @@ ; FIXME: should use a normal load instead of CSG. ; CHECK-LABEL: f10: ; CHECK: lg {{%r[0-5]}}, 0(%r2) -; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: {{jhe|jnhe}} [[LABEL:[^ ]*]] +; CHECK: stg {{%r[0-5]}}, 0(%r2) +; CHECK: br %r14 ; CHECK: [[LABEL]]: +; CHECK: lgr {{%r[0-5]}}, {{%r[0-5]}} ; CHECK: stg {{%r[0-5]}}, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 Index: test/CodeGen/SystemZ/cond-store-05.ll =================================================================== --- test/CodeGen/SystemZ/cond-store-05.ll +++ test/CodeGen/SystemZ/cond-store-05.ll @@ -156,8 +156,11 @@ define void @f10(float *%ptr, float %alt, i32 %limit) { ; CHECK-LABEL: f10: ; CHECK: le {{%f[0-5]}}, 0(%r2) -; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: {{jhe|jnhe}} [[LABEL:[^ ]*]] +; CHECK: ste {{%f[0-5]}}, 0(%r2) +; CHECK: br %r14 ; CHECK: [[LABEL]]: +; CHECK: ler {{%f[0-5]}}, {{%f[0-5]}} ; CHECK: ste {{%f[0-5]}}, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 Index: test/CodeGen/SystemZ/cond-store-06.ll =================================================================== --- test/CodeGen/SystemZ/cond-store-06.ll +++ test/CodeGen/SystemZ/cond-store-06.ll @@ -156,8 +156,11 @@ define void @f10(double *%ptr, double %alt, i32 %limit) { ; CHECK-LABEL: f10: ; CHECK: ld {{%f[0-5]}}, 0(%r2) -; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: {{jhe|jnhe}} [[LABEL:[^ ]*]] +; CHECK: std {{%f[0-5]}}, 0(%r2) +; CHECK: br %r14 ; CHECK: [[LABEL]]: +; CHECK: ldr {{%f[0-5]}}, {{%f[0-5]}} ; CHECK: std {{%f[0-5]}}, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 Index: test/CodeGen/SystemZ/int-cmp-48.ll =================================================================== --- test/CodeGen/SystemZ/int-cmp-48.ll +++ test/CodeGen/SystemZ/int-cmp-48.ll @@ -52,7 +52,7 @@ define double @f3(i8 *%src, double %a, double %b) { ; CHECK-LABEL: f3: ; CHECK: tm 0(%r2), 1 -; CHECK: je {{\.L.*}} +; CHECK: jne {{\.L.*}} ; CHECK: br %r14 %byte = load i8 , i8 *%src %and = and i8 %byte, 1 @@ -80,7 +80,7 @@ define double @f5(i8 *%src, double %a, double %b) { ; CHECK-LABEL: f5: ; CHECK: tm 0(%r2), 1 -; CHECK: jne {{\.L.*}} +; CHECK: je {{\.L.*}} ; CHECK: br %r14 %byte = load i8 , i8 *%src %and = and i8 %byte, 1 @@ -93,7 +93,7 @@ define double @f6(i8 *%src, double %a, double %b) { ; CHECK-LABEL: f6: ; CHECK: tm 0(%r2), 254 -; CHECK: jo {{\.L.*}} +; CHECK: jno {{\.L.*}} ; CHECK: br %r14 %byte = load i8 , i8 *%src %and = and i8 %byte, 254 @@ -106,7 +106,7 @@ define double @f7(i8 *%src, double %a, double %b) { ; CHECK-LABEL: f7: ; CHECK: tm 0(%r2), 254 -; CHECK: jno {{\.L.*}} +; CHECK: jo {{\.L.*}} ; CHECK: br %r14 %byte = load i8 , i8 *%src %and = and i8 %byte, 254 @@ -121,7 +121,7 @@ ; CHECK-LABEL: f8: ; CHECK: llc [[REG:%r[0-5]]], 0(%r2) ; CHECK: tmll [[REG]], 3 -; CHECK: jh {{\.L.*}} +; CHECK: jnh {{\.L.*}} ; CHECK: br %r14 %byte = load i8 , i8 *%src %and = and i8 %byte, 3 @@ -135,7 +135,7 @@ ; CHECK-LABEL: f9: ; CHECK: llc [[REG:%r[0-5]]], 0(%r2) ; CHECK: tmll [[REG]], 3 -; CHECK: jl {{\.L.*}} +; CHECK: jnl {{\.L.*}} ; CHECK: br %r14 %byte = load i8 , i8 *%src %and = and i8 %byte, 3 @@ -148,7 +148,7 @@ define double @f10(i8 *%src, double %a, double %b) { ; CHECK-LABEL: f10: ; CHECK: tm 4095(%r2), 1 -; CHECK: je {{\.L.*}} +; CHECK: jne {{\.L.*}} ; CHECK: br %r14 %ptr = getelementptr i8, i8 *%src, i64 4095 %byte = load i8 , i8 *%ptr @@ -162,7 +162,7 @@ define double @f11(i8 *%src, double %a, double %b) { ; CHECK-LABEL: f11: ; CHECK: tmy 4096(%r2), 1 -; CHECK: je {{\.L.*}} +; CHECK: jne {{\.L.*}} ; CHECK: br %r14 %ptr = getelementptr i8, i8 *%src, i64 4096 %byte = load i8 , i8 *%ptr @@ -176,7 +176,7 @@ define double @f12(i8 *%src, double %a, double %b) { ; CHECK-LABEL: f12: ; CHECK: tmy 524287(%r2), 1 -; CHECK: je {{\.L.*}} +; CHECK: jne {{\.L.*}} ; CHECK: br %r14 %ptr = getelementptr i8, i8 *%src, i64 524287 %byte = load i8 , i8 *%ptr @@ -191,7 +191,7 @@ ; CHECK-LABEL: f13: ; CHECK: agfi %r2, 524288 ; CHECK: tm 0(%r2), 1 -; CHECK: je {{\.L.*}} +; CHECK: jne {{\.L.*}} ; CHECK: br %r14 %ptr = getelementptr i8, i8 *%src, i64 524288 %byte = load i8 , i8 *%ptr @@ -205,7 +205,7 @@ define double @f14(i8 *%src, double %a, double %b) { ; CHECK-LABEL: f14: ; CHECK: tmy -524288(%r2), 1 -; CHECK: je {{\.L.*}} +; CHECK: jne {{\.L.*}} ; CHECK: br %r14 %ptr = getelementptr i8, i8 *%src, i64 -524288 %byte = load i8 , i8 *%ptr @@ -220,7 +220,7 @@ ; CHECK-LABEL: f15: ; CHECK: agfi %r2, -524289 ; CHECK: tm 0(%r2), 1 -; CHECK: je {{\.L.*}} +; CHECK: jne {{\.L.*}} ; CHECK: br %r14 %ptr = getelementptr i8, i8 *%src, i64 -524289 %byte = load i8 , i8 *%ptr @@ -234,7 +234,7 @@ define double @f16(i8 *%src, i64 %index, double %a, double %b) { ; CHECK-LABEL: f16: ; CHECK: tm 0({{%r[1-5]}}), 1 -; CHECK: je {{\.L.*}} +; CHECK: jne {{\.L.*}} ; CHECK: br %r14 %ptr = getelementptr i8, i8 *%src, i64 %index %byte = load i8 , i8 *%ptr Index: test/CodeGen/SystemZ/tdc-06.ll =================================================================== --- test/CodeGen/SystemZ/tdc-06.ll +++ test/CodeGen/SystemZ/tdc-06.ll @@ -26,25 +26,27 @@ nonzeroord: ; CHECK: lhi %r2, 2 ; CHECK: tcdb %f0, 48 -; CHECK: jl [[RET]] +; CHECK: je [[FINITE:.]] %abs = tail call double @llvm.fabs.f64(double %x) %testinf = fcmp oeq double %abs, 0x7FF0000000000000 br i1 %testinf, label %ret, label %finite, !prof !1 +ret: +; CHECK: [[RET]]: +; CHECK: br %r14 + %res = phi i32 [ 5, %entry ], [ 1, %nonzero ], [ 2, %nonzeroord ], [ %finres, %finite ] + ret i32 %res + finite: ; CHECK: lhi %r2, 3 ; CHECK: tcdb %f0, 831 ; CHECK: blr %r14 ; CHECK: lhi %r2, 4 +; CHECK: br %r14 %testnormal = fcmp uge double %abs, 0x10000000000000 %finres = select i1 %testnormal, i32 3, i32 4 br label %ret -ret: -; CHECK: [[RET]]: -; CHECK: br %r14 - %res = phi i32 [ 5, %entry ], [ 1, %nonzero ], [ 2, %nonzeroord ], [ %finres, %finite ] - ret i32 %res } !1 = !{!"branch_weights", i32 1, i32 1} Index: test/CodeGen/WebAssembly/mem-intrinsics.ll =================================================================== --- test/CodeGen/WebAssembly/mem-intrinsics.ll +++ test/CodeGen/WebAssembly/mem-intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -tail-dup-placement=0| FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -tail-dup-placement=0 | FileCheck %s ; Test memcpy, memmove, and memset intrinsics. Index: test/CodeGen/X86/avx-splat.ll =================================================================== --- test/CodeGen/X86/avx-splat.ll +++ test/CodeGen/X86/avx-splat.ll @@ -62,8 +62,10 @@ ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ## implicit-def: %YMM0 ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne LBB4_2 -; CHECK-NEXT: ## BB#1: ## %load.i1247 +; CHECK-NEXT: je LBB4_1 +; CHECK-NEXT: ## BB#2: ## %__load_and_broadcast_32.exit1249 +; CHECK-NEXT: retq +; CHECK-NEXT: LBB4_1: ## %load.i1247 ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: andq $-32, %rsp @@ -71,7 +73,6 @@ ; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0 ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp -; CHECK-NEXT: LBB4_2: ## %__load_and_broadcast_32.exit1249 ; CHECK-NEXT: retq allocas: %udx495 = alloca [18 x [18 x float]], align 32 Index: test/CodeGen/X86/block-placement.ll =================================================================== --- test/CodeGen/X86/block-placement.ll +++ test/CodeGen/X86/block-placement.ll @@ -314,7 +314,7 @@ define void @unnatural_cfg1() { ; Test that we can handle a loop with an inner unnatural loop at the end of ; a function. This is a gross CFG reduced out of the single source GCC. -; CHECK: unnatural_cfg1 +; CHECK-LABEL: unnatural_cfg1 ; CHECK: %entry ; CHECK: %loop.body1 ; CHECK: %loop.body2 @@ -352,7 +352,11 @@ ; Test that we can handle a loop with a nested natural loop *and* an unnatural ; loop. This was reduced from a crash on block placement when run over ; single-source GCC. -; CHECK: unnatural_cfg2 +; The tail-duplication outlining algorithm places +; %loop.body3 and %loop.inner1.begin out-of-line at the end of the loop, +; because %loop.body4 is unnavoidable within the loop and short, +; and %loop.inner1.begin has an alternate fallthrough of %loop.body3 +; CHECK-LABEL: unnatural_cfg2 ; CHECK: %entry ; CHECK: %loop.body1 ; CHECK: %loop.body2 @@ -559,7 +563,7 @@ ; didn't correctly locate the fallthrough successor, assuming blindly that the ; first one was the fallthrough successor. As a result, we would add an ; erroneous jump to the landing pad thinking *that* was the default successor. -; CHECK: test_eh_lpad_successor +; CHECK-LABEL: test_eh_lpad_successor ; CHECK: %entry ; CHECK-NOT: jmp ; CHECK: %loop @@ -587,7 +591,7 @@ ; fallthrough simply won't occur. Make sure we don't crash trying to update ; terminators for such constructs. ; -; CHECK: test_eh_throw +; CHECK-LABEL: test_eh_throw ; CHECK: %entry ; CHECK: %cleanup @@ -609,7 +613,7 @@ ; attempt to merge onto the wrong end of the inner loop just because we find it ; first. This was reduced from a crasher in GCC's single source. ; -; CHECK: test_unnatural_cfg_backwards_inner_loop +; CHECK-LABEL: test_unnatural_cfg_backwards_inner_loop ; CHECK: %entry ; CHECK: %loop2b ; CHECK: %loop1 @@ -649,7 +653,7 @@ ; fallthrough because that happens to always produce unanalyzable branches on ; x86. ; -; CHECK: unanalyzable_branch_to_loop_header +; CHECK-LABEL: unanalyzable_branch_to_loop_header ; CHECK: %entry ; CHECK: %loop ; CHECK: %exit @@ -673,7 +677,7 @@ ; This branch is now analyzable and hence the destination block becomes the ; hotter one. The right order is entry->bar->exit->foo. ; -; CHECK: unanalyzable_branch_to_best_succ +; CHECK-LABEL: unanalyzable_branch_to_best_succ ; CHECK: %entry ; CHECK: %bar ; CHECK: %exit @@ -699,7 +703,7 @@ ; Ensure that we can handle unanalyzable branches where the destination block ; gets selected as the best free block in the CFG. ; -; CHECK: unanalyzable_branch_to_free_block +; CHECK-LABEL: unanalyzable_branch_to_free_block ; CHECK: %entry ; CHECK: %a ; CHECK: %b @@ -729,7 +733,7 @@ ; Ensure that we don't crash as we're building up many unanalyzable branches, ; blocks, and loops. ; -; CHECK: many_unanalyzable_branches +; CHECK-LABEL: many_unanalyzable_branches ; CHECK: %entry ; CHECK: %exit @@ -948,7 +952,7 @@ ; strange layouts that are siginificantly less efficient, often times maing ; it discontiguous. ; -; CHECK: @benchmark_heapsort +; CHECK-LABEL: @benchmark_heapsort ; CHECK: %entry ; First rotated loop top. ; CHECK: .p2align Index: test/CodeGen/X86/critical-edge-split-2.ll =================================================================== --- test/CodeGen/X86/critical-edge-split-2.ll +++ test/CodeGen/X86/critical-edge-split-2.ll @@ -24,6 +24,7 @@ ; CHECK-LABEL: test1: ; CHECK: testb %dil, %dil -; CHECK: jne LBB0_2 +; CHECK: je LBB0_1 +; CHECK: retq +; CHECK: LBB0_1: ; CHECK: divl -; CHECK: LBB0_2: Index: test/CodeGen/X86/shift-double.ll =================================================================== --- test/CodeGen/X86/shift-double.ll +++ test/CodeGen/X86/shift-double.ll @@ -14,11 +14,13 @@ ; CHECK-NEXT: shll %cl, %eax ; CHECK-NEXT: shldl %cl, %esi, %edx ; CHECK-NEXT: testb $32, %cl -; CHECK-NEXT: je .LBB0_2 -; CHECK-NEXT: # BB#1: +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_1: ; CHECK-NEXT: movl %eax, %edx ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl %shift.upgrd.1 = zext i8 %C to i64 ; [#uses=1] @@ -37,12 +39,14 @@ ; CHECK-NEXT: sarl %cl, %edx ; CHECK-NEXT: shrdl %cl, %esi, %eax ; CHECK-NEXT: testb $32, %cl -; CHECK-NEXT: je .LBB1_2 -; CHECK-NEXT: # BB#1: +; CHECK-NEXT: jne .LBB1_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB1_1: ; CHECK-NEXT: sarl $31, %esi ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: movl %esi, %edx -; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl %shift.upgrd.2 = zext i8 %C to i64 ; [#uses=1] @@ -61,11 +65,13 @@ ; CHECK-NEXT: shrl %cl, %edx ; CHECK-NEXT: shrdl %cl, %esi, %eax ; CHECK-NEXT: testb $32, %cl -; CHECK-NEXT: je .LBB2_2 -; CHECK-NEXT: # BB#1: +; CHECK-NEXT: jne .LBB2_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB2_1: ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl %shift.upgrd.3 = zext i8 %C to i64 ; [#uses=1] Index: test/CodeGen/X86/sink-hoist.ll =================================================================== --- test/CodeGen/X86/sink-hoist.ll +++ test/CodeGen/X86/sink-hoist.ll @@ -26,7 +26,8 @@ ; CHECK-LABEL: split: ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: je +; CHECK-NEXT: jne +; CHECK: ret ; CHECK: divsd ; CHECK: movapd ; CHECK: ret Index: test/CodeGen/X86/sse-scalar-fp-arith.ll =================================================================== --- test/CodeGen/X86/sse-scalar-fp-arith.ll +++ test/CodeGen/X86/sse-scalar-fp-arith.ll @@ -1110,10 +1110,12 @@ ; AVX1-LABEL: add_ss_mask: ; AVX1: # BB#0: ; AVX1-NEXT: testb $1, %dil -; AVX1-NEXT: je .LBB62_2 -; AVX1-NEXT: # BB#1: +; AVX1-NEXT: jne .LBB62_1 +; AVX1-NEXT: # BB#2: +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] +; AVX1-NEXT: retq +; AVX1-NEXT: .LBB62_1: ; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: .LBB62_2: ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] ; AVX1-NEXT: retq ; @@ -1165,10 +1167,12 @@ ; AVX1-LABEL: add_sd_mask: ; AVX1: # BB#0: ; AVX1-NEXT: testb $1, %dil -; AVX1-NEXT: je .LBB63_2 -; AVX1-NEXT: # BB#1: +; AVX1-NEXT: jne .LBB63_1 +; AVX1-NEXT: # BB#2: +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1] +; AVX1-NEXT: retq +; AVX1-NEXT: .LBB63_1: ; AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: .LBB63_2: ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; AVX1-NEXT: retq ; Index: test/CodeGen/X86/tail-dup-merge-loop-headers.ll =================================================================== --- test/CodeGen/X86/tail-dup-merge-loop-headers.ll +++ test/CodeGen/X86/tail-dup-merge-loop-headers.ll @@ -6,13 +6,13 @@ ; CHECK-LABEL: tail_dup_merge_loops ; CHECK: # %entry ; CHECK-NOT: # %{{[a-zA-Z_]+}} +; CHECK: # %exit +; CHECK-NOT: # %{{[a-zA-Z_]+}} ; CHECK: # %inner_loop_exit ; CHECK-NOT: # %{{[a-zA-Z_]+}} ; CHECK: # %inner_loop_latch ; CHECK-NOT: # %{{[a-zA-Z_]+}} ; CHECK: # %inner_loop_test -; CHECK-NOT: # %{{[a-zA-Z_]+}} -; CHECK: # %exit define void @tail_dup_merge_loops(i32 %a, i8* %b, i8* %c) local_unnamed_addr #0 { entry: %notlhs674.i = icmp eq i32 %a, 0 Index: test/CodeGen/X86/tail-dup-repeat.ll =================================================================== --- test/CodeGen/X86/tail-dup-repeat.ll +++ test/CodeGen/X86/tail-dup-repeat.ll @@ -1,4 +1,4 @@ -; RUN: llc -O2 -tail-dup-placement-threshold=4 -o - %s | FileCheck %s +; RUN: llc -O3 -tail-dup-placement-threshold=4 -o - %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/CodeGen/X86/tail-opts.ll =================================================================== --- test/CodeGen/X86/tail-opts.ll +++ test/CodeGen/X86/tail-opts.ll @@ -112,14 +112,13 @@ ; CHECK: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}} ; CHECK-NEXT: jbe .LBB2_3 ; CHECK-NEXT: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}} -; CHECK-NEXT: ja .LBB2_4 -; CHECK-NEXT: jmp .LBB2_2 -; CHECK-NEXT: .LBB2_3: -; CHECK-NEXT: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}} ; CHECK-NEXT: jbe .LBB2_2 ; CHECK-NEXT: .LBB2_4: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_3: +; CHECK-NEXT: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}} +; CHECK-NEXT: ja .LBB2_4 ; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: ret Index: test/CodeGen/X86/twoaddr-coalesce-3.ll =================================================================== --- test/CodeGen/X86/twoaddr-coalesce-3.ll +++ test/CodeGen/X86/twoaddr-coalesce-3.ll @@ -19,7 +19,7 @@ ; Check that only one mov will be generated in the kernel loop. ; CHECK-LABEL: foo: -; CHECK: [[LOOP1:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body +; CHECK: [[LOOP1:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body{{$}} ; CHECK-NOT: mov ; CHECK: movl {{.*}}, [[REG1:%[a-z0-9]+]] ; CHECK-NOT: mov @@ -56,7 +56,7 @@ ; Check that only two mov will be generated in the kernel loop. ; CHECK-LABEL: goo: -; CHECK: [[LOOP2:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body +; CHECK: [[LOOP2:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body{{$}} ; CHECK-NOT: mov ; CHECK: movl {{.*}}, [[REG2:%[a-z0-9]+]] ; CHECK-NOT: mov