Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -1072,6 +1072,10 @@ if (!shouldTailDuplicate(Succ)) return false; + // The result of canTailDuplicate. + bool Duplicate = true; + // Number of possible duplication. + unsigned int NumDup = 0; // For CFG checking. SmallPtrSet Successors(BB->succ_begin(), BB->succ_end()); @@ -1118,9 +1122,46 @@ // to trellises created by tail-duplication, so we just look for the // CFG. continue; - return false; + Duplicate = false; + continue; } + NumDup++; } + + // No possible duplication in current filter set. + if (NumDup == 0) + return false; + + // This is mainly for function exit BB. + // The integrated tail duplication is really designed for increasing + // fallthrough from predecessors from Succ to its successors. We may need + // other machanism to handle different cases. + if (Succ->succ_size() == 0) + return true; + + // Plus the already placed predecessor. + NumDup++; + + // If the duplication candidate has more unplaced predecessors than + // successors, the extra duplication can't bring more fallthrough. + // + // Pred1 Pred2 Pred3 + // \ | / + // \ | / + // \ | / + // Dup + // / \ + // / \ + // Succ1 Succ2 + // + // In this example Dup has 2 successors and 3 predecessors, duplication of Dup + // can increase the fallthrough from Pred1 to Succ1 and from Pred2 to Succ2, + // but the duplication into Pred3 can't increase fallthrough. + // + // A small number of extra duplication may not hurt too much. We need a better + // heuristic to handle it. + if ((NumDup > Succ->succ_size()) || !Duplicate) + return false; return true; } @@ -1416,9 +1457,10 @@ bool BadCFGConflict = false; for (MachineBasicBlock *Pred : Succ->predecessors()) { - if (Pred == Succ || BlockToChain[Pred] == &SuccChain || + BlockChain *PredChain = BlockToChain[Pred]; + if (Pred == Succ || PredChain == &SuccChain || (BlockFilter && !BlockFilter->count(Pred)) || - BlockToChain[Pred] == &Chain || + PredChain == &Chain || Pred != *std::prev(PredChain->end()) || // This check is redundant except for look ahead. This function is // called for lookahead by isProfitableToTailDup when BB hasn't been // placed yet. @@ -1720,7 +1762,9 @@ MachineBasicBlock* BestSucc = Result.BB; bool ShouldTailDup = Result.ShouldTailDup; if (allowTailDupPlacement()) - ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc)); + ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(BB, BestSucc, + Chain, + BlockFilter)); // If an immediate successor isn't available, look for the best viable // block among those we've identified as not violating the loop's CFG at Index: test/CodeGen/AArch64/swifterror.ll =================================================================== --- test/CodeGen/AArch64/swifterror.ll +++ test/CodeGen/AArch64/swifterror.ll @@ -162,12 +162,12 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float %cc2) { ; CHECK-APPLE-LABEL: foo_loop: ; CHECK-APPLE: mov x0, x21 +; CHECK-APPLE: fcmp +; CHECK-APPLE: b.gt ; CHECK-APPLE: cbz ; CHECK-APPLE: mov w0, #16 ; CHECK-APPLE: malloc ; CHECK-APPLE: strb w{{.*}}, [x0, #8] -; CHECK-APPLE: fcmp -; CHECK-APPLE: b.le ; CHECK-APPLE: mov x21, x0 ; CHECK-APPLE: ret Index: test/CodeGen/AArch64/tbz-tbnz.ll =================================================================== --- test/CodeGen/AArch64/tbz-tbnz.ll +++ test/CodeGen/AArch64/tbz-tbnz.ll @@ -153,7 +153,7 @@ br i1 %tst3, label %if.then3, label %if.end ; CHECK: tst x0, x1, lsl #63 -; CHECK: b.ge +; CHECK: b.lt if.then3: %shifted_op2 = shl i64 %val2, 62 Index: test/CodeGen/AMDGPU/uniform-cfg.ll =================================================================== --- test/CodeGen/AMDGPU/uniform-cfg.ll +++ test/CodeGen/AMDGPU/uniform-cfg.ll @@ -330,13 +330,14 @@ ; GCN-LABEL: {{^}}divergent_inside_uniform: ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 -; GCN: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]] +; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]] +; GCN: [[ENDIF_LABEL:[0-9_A-Za-z]+]]: +; GCN: [[IF_LABEL]]: ; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc ; GCN: ; mask branch [[ENDIF_LABEL]] ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN: buffer_store_dword [[ONE]] -; GCN: [[ENDIF_LABEL]]: ; GCN: s_endpgm define amdgpu_kernel void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) { entry: Index: test/CodeGen/PowerPC/branch-opt.ll =================================================================== --- test/CodeGen/PowerPC/branch-opt.ll +++ test/CodeGen/PowerPC/branch-opt.ll @@ -8,13 +8,11 @@ ; The last (whichever it is) should have a fallthrough exit, and the other three ; need an unconditional branch. No other block should have an unconditional ; branch to cond_next48 -; One of the blocks ends up with a loop exit block that gets a tail-duplicated copy -; of %cond_next48, so there should only be two unconditional branches. -;CHECK: b .LBB0_13 -;CHECK: b .LBB0_13 -;CHECK-NOT: b .LBB0_13 -;CHECK: .LBB0_13: # %cond_next48 +;CHECK: .LBB0_7: # %cond_next48 +;CHECK: b .LBB0_7 +;CHECK: b .LBB0_7 +;CHECK: b .LBB0_7 define void @foo(i32 %W, i32 %X, i32 %Y, i32 %Z) { entry: Index: test/CodeGen/PowerPC/expand-contiguous-isel.ll =================================================================== --- test/CodeGen/PowerPC/expand-contiguous-isel.ll +++ test/CodeGen/PowerPC/expand-contiguous-isel.ll @@ -137,6 +137,7 @@ ; CHECK: bc 12, eq, [[TRUE:.LBB[0-9]+]] ; CHECK-NEXT: b [[SUCCESSOR:.LBB[0-9]+]] ; CHECK-NEXT: [[TRUE]] +; CHECK-NEXT: # in Loop: Header ; CHECK-NEXT: addi {{r[0-9]+}}, {{r[0-9]+}}, 0 ; CHECK-NEXT: [[SUCCESSOR]] } Index: test/CodeGen/PowerPC/no-duplicate.ll =================================================================== --- test/CodeGen/PowerPC/no-duplicate.ll +++ test/CodeGen/PowerPC/no-duplicate.ll @@ -0,0 +1,91 @@ +; RUN: llc -O2 < %s | FileCheck %s + +target triple = "powerpc64le-grtev4-linux-gnu" + +; No duplication of loop header into entry block. +define void @no_duplicate1(i64 %a) { +; CHECK-LABEL: no_duplicate1 +; CHECK: mr 30, 3 +; CHECK-NEXT: b .LBB0_2 + +; CHECK: .LBB0_2: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpldi 30, 100 +; CHECK-NEXT: bne 0, .LBB0_1 +entry: + br label %header + +header: + %ind = phi i64 [%a, %entry], [%val3, %latch] + %cond1 = icmp eq i64 %ind, 100 + br i1 %cond1, label %middle, label %latch + +middle: + %condx = call i1 @foo() + %val1 = xor i64 %ind, 2 + br label %latch + +latch: + %val2 = phi i64 [%ind, %header], [%val1, %middle] + %val3 = add i64 %val2, 1 + %cond2 = call i1 @foo() + br i1 %cond2, label %end, label %header + +end: + ret void +} + +; No duplication of loop header into latches. +define void @no_duplicate2(i64 %a) { +; CHECK-LABEL: no_duplicate2 +; CHECK: mr 30, 3 +; CHECK-NEXT: b .LBB1_2 + +; CHECK: .LBB1_2: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpldi 30, 100 +; CHECK-NEXT: bne 0, .LBB1_1 + +; CHECK: %latch2 +; CHECK: b .LBB1_2 + +; CHECK: %latch3 +; CHECK: b .LBB1_2 +entry: + br label %header + +header: + %ind = phi i64 [%a, %entry], [%val1, %latch1], [%val2, %latch2], [%val2, %latch3] + %cond1 = icmp eq i64 %ind, 100 + br i1 %cond1, label %middle1, label %latch1 + +latch1: + %cond2 = call i1 @foo() + %val1 = xor i64 %ind, 2 + br i1 %cond2, label %end, label %header + +middle1: + %cond3 = call i1 @foo() + br i1 %cond3, label %latch1, label %middle2 + +middle2: + %cond4 = call i1 @foo() + %val2 = add i64 %ind, 1 + br i1 %cond4, label %latch2, label %latch3 + +latch2: + call void @a() + br label %header + +latch3: + call void @b() + br label %header + +end: + ret void +} + + +declare i1 @foo() +declare void @a() +declare void @b() Index: test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll =================================================================== --- test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll +++ test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll @@ -31,9 +31,14 @@ define i32 @test_dead_cycle(i32 %n) nounwind { ; CHECK-LABEL: test_dead_cycle: +; CHECK: subs +; also check for duplicate induction variables (radar 7645034) +; CHECK: subs r{{.*}}, #1 +; CHECK-NOT: subs r{{.*}}, #1 ; CHECK: bl ; CHECK-NOT: mov ; CHECK: bl +; CHECK: pop entry: %0 = icmp eq i32 %n, 1 ; [#uses=1] br i1 %0, label %return, label %bb.nph @@ -58,10 +63,6 @@ br label %bb2 bb2: ; preds = %bb1, %bb -; also check for duplicate induction variables (radar 7645034) -; CHECK: subs r{{.*}}, #1 -; CHECK-NOT: subs r{{.*}}, #1 -; CHECK: pop %u.0 = phi i64 [ %ins, %bb1 ], [ %u.17, %bb ] ; [#uses=2] %indvar.next = add i32 %indvar, 1 ; [#uses=2] %exitcond = icmp eq i32 %indvar.next, %tmp ; [#uses=1] Index: test/CodeGen/Thumb2/cbnz.ll =================================================================== --- test/CodeGen/Thumb2/cbnz.ll +++ test/CodeGen/Thumb2/cbnz.ll @@ -5,7 +5,7 @@ define void @f(i32 %x, i32 %y) { ; CHECK-LABEL: f: - ; CHECK: cbnz + ; CHECK: cbz %p = icmp eq i32 %x, 0 br i1 %p, label %t, label %f @@ -26,7 +26,7 @@ call void @x() call void @x() call void @x() - ; CHECK: cbz + ; CHECK: bne %q = icmp eq i32 %y, 0 br i1 %q, label %t2, label %f Index: test/CodeGen/X86/mmx-coalescing.ll =================================================================== --- test/CodeGen/X86/mmx-coalescing.ll +++ test/CodeGen/X86/mmx-coalescing.ll @@ -16,17 +16,14 @@ ; CHECK-NEXT: # %bb.2: # %if.B ; CHECK-NEXT: pshufw $238, %mm0, %mm0 # mm0 = mm0[2,3,2,3] ; CHECK-NEXT: movq %mm0, %rax -; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: jne .LBB0_4 +; CHECK-NEXT: jmp .LBB0_3 ; CHECK-NEXT: .LBB0_1: # %if.A -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movd %edx, %mm1 ; CHECK-NEXT: psllq %mm1, %mm0 ; CHECK-NEXT: movq %mm0, %rax ; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: jne .LBB0_4 -; CHECK-NEXT: # %bb.3: # %if.C -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: .LBB0_3: # %if.C ; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: je .LBB0_1 ; CHECK-NEXT: .LBB0_4: # %merge Index: test/CodeGen/X86/pr38795.ll =================================================================== --- test/CodeGen/X86/pr38795.ll +++ test/CodeGen/X86/pr38795.ll @@ -70,8 +70,15 @@ ; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: movb %dh, %dl ; CHECK-NEXT: jne .LBB0_16 +; CHECK-NEXT: jmp .LBB0_6 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: # %bb.6: # %for.cond35 +; CHECK-NEXT: .LBB0_3: # %if.then +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movl $.str, (%esp) +; CHECK-NEXT: calll printf +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload +; CHECK-NEXT: # implicit-def: $eax +; CHECK-NEXT: .LBB0_6: # %for.cond35 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: je .LBB0_7 @@ -96,22 +103,10 @@ ; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: jmp .LBB0_20 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_3: # %if.then -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl $.str, (%esp) -; CHECK-NEXT: calll printf -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload -; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jne .LBB0_11 -; CHECK-NEXT: jmp .LBB0_7 -; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_8: # %if.end21 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: je .LBB0_13 -; CHECK-NEXT: jmp .LBB0_10 +; CHECK-NEXT: jmp .LBB0_9 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: xorl %edi, %edi @@ -127,11 +122,11 @@ ; CHECK-NEXT: # in Loop: Header=BB0_20 Depth=2 ; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne .LBB0_20 -; CHECK-NEXT: # %bb.9: # %ae +; CHECK-NEXT: .LBB0_9: # %ae ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne .LBB0_10 -; CHECK-NEXT: .LBB0_13: # %if.end26 +; CHECK-NEXT: # %bb.13: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testb %dl, %dl Index: test/CodeGen/X86/ragreedy-hoist-spill.ll =================================================================== --- test/CodeGen/X86/ragreedy-hoist-spill.ll +++ test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -114,7 +114,7 @@ ; CHECK-NEXT: jle LBB0_22 ; CHECK-NEXT: LBB0_13: ## %while.body200 ; CHECK-NEXT: ## =>This Loop Header: Depth=1 -; CHECK-NEXT: ## Child Loop BB0_30 Depth 2 +; CHECK-NEXT: ## Child Loop BB0_29 Depth 2 ; CHECK-NEXT: ## Child Loop BB0_38 Depth 2 ; CHECK-NEXT: leal -268(%r14), %eax ; CHECK-NEXT: cmpl $105, %eax @@ -159,27 +159,25 @@ ; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: ## implicit-def: $rax -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jns LBB0_30 -; CHECK-NEXT: jmp LBB0_55 +; CHECK-NEXT: jmp LBB0_29 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_32: ## %do.body479.backedge -; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 +; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 ; CHECK-NEXT: leaq 1(%r12), %rax ; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: je LBB0_33 -; CHECK-NEXT: ## %bb.29: ## %land.rhs485 -; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: js LBB0_55 -; CHECK-NEXT: LBB0_30: ## %cond.true.i.i2780 +; CHECK-NEXT: LBB0_29: ## %land.rhs485 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: js LBB0_55 +; CHECK-NEXT: ## %bb.30: ## %cond.true.i.i2780 +; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 ; CHECK-NEXT: movq %rax, %r12 ; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: ## %bb.31: ## %lor.rhs500 -; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 +; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 ; CHECK-NEXT: movl $256, %esi ## imm = 0x100 ; CHECK-NEXT: callq ___maskrune ; CHECK-NEXT: xorl %edx, %edx Index: test/CodeGen/X86/reverse_branches.ll =================================================================== --- test/CodeGen/X86/reverse_branches.ll +++ test/CodeGen/X86/reverse_branches.ll @@ -36,24 +36,24 @@ ; CHECK-NEXT: xorl %r12d, %r12d ; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %r14 ; CHECK-NEXT: movq %rsp, %r15 -; CHECK-NEXT: cmpl $999, %r12d ## imm = 0x3E7 -; CHECK-NEXT: jle LBB0_2 -; CHECK-NEXT: jmp LBB0_7 +; CHECK-NEXT: jmp LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_6: ## %for.inc9 -; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: incl %r12d -; CHECK-NEXT: cmpl $999, %r12d ## imm = 0x3E7 -; CHECK-NEXT: jg LBB0_7 -; CHECK-NEXT: LBB0_2: ## %for.cond1.preheader +; CHECK-NEXT: LBB0_1: ## %for.cond ; CHECK-NEXT: ## =>This Loop Header: Depth=1 ; CHECK-NEXT: ## Child Loop BB0_3 Depth 2 +; CHECK-NEXT: cmpl $999, %r12d ## imm = 0x3E7 +; CHECK-NEXT: jg LBB0_7 +; CHECK-NEXT: ## %bb.2: ## %for.cond1.preheader +; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $-1, %r13d ; CHECK-NEXT: movq %r15, %rbx ; CHECK-NEXT: movq %r14, %rbp ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_3: ## %for.cond1 -; CHECK-NEXT: ## Parent Loop BB0_2 Depth=1 +; CHECK-NEXT: ## Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 ; CHECK-NEXT: incl %r13d ; CHECK-NEXT: cmpl $999, %r13d ## imm = 0x3E7 @@ -74,47 +74,45 @@ ; CHECK-NEXT: callq _puts ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: movq %rsp, %rcx -; CHECK-NEXT: cmpl $999, %eax ## imm = 0x3E7 -; CHECK-NEXT: jle LBB0_9 -; CHECK-NEXT: jmp LBB0_16 +; CHECK-NEXT: jmp LBB0_8 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_15: ## %for.inc38 -; CHECK-NEXT: ## in Loop: Header=BB0_9 Depth=1 +; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1 ; CHECK-NEXT: incl %eax -; CHECK-NEXT: cmpl $999, %eax ## imm = 0x3E7 -; CHECK-NEXT: jg LBB0_16 -; CHECK-NEXT: LBB0_9: ## %for.cond18.preheader +; CHECK-NEXT: LBB0_8: ## %for.cond14 ; CHECK-NEXT: ## =>This Loop Header: Depth=1 -; CHECK-NEXT: ## Child Loop BB0_11 Depth 2 +; CHECK-NEXT: ## Child Loop BB0_10 Depth 2 ; CHECK-NEXT: ## Child Loop BB0_12 Depth 3 +; CHECK-NEXT: cmpl $999, %eax ## imm = 0x3E7 +; CHECK-NEXT: jg LBB0_16 +; CHECK-NEXT: ## %bb.9: ## %for.cond18.preheader +; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1 ; CHECK-NEXT: movq %rcx, %rdx ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: cmpl $999, %edi ## imm = 0x3E7 -; CHECK-NEXT: jle LBB0_11 -; CHECK-NEXT: jmp LBB0_15 +; CHECK-NEXT: jmp LBB0_10 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_14: ## %exit -; CHECK-NEXT: ## in Loop: Header=BB0_11 Depth=2 +; CHECK-NEXT: ## in Loop: Header=BB0_10 Depth=2 ; CHECK-NEXT: addq %rsi, %rbp ; CHECK-NEXT: incq %rdi ; CHECK-NEXT: decq %rsi ; CHECK-NEXT: addq $1001, %rdx ## imm = 0x3E9 ; CHECK-NEXT: cmpq $-1000, %rbp ## imm = 0xFC18 ; CHECK-NEXT: jne LBB0_5 -; CHECK-NEXT: ## %bb.10: ## %for.cond18 -; CHECK-NEXT: ## in Loop: Header=BB0_11 Depth=2 -; CHECK-NEXT: cmpl $999, %edi ## imm = 0x3E7 -; CHECK-NEXT: jg LBB0_15 -; CHECK-NEXT: LBB0_11: ## %for.body20 -; CHECK-NEXT: ## Parent Loop BB0_9 Depth=1 +; CHECK-NEXT: LBB0_10: ## %for.cond18 +; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1 ; CHECK-NEXT: ## => This Loop Header: Depth=2 ; CHECK-NEXT: ## Child Loop BB0_12 Depth 3 +; CHECK-NEXT: cmpl $999, %edi ## imm = 0x3E7 +; CHECK-NEXT: jg LBB0_15 +; CHECK-NEXT: ## %bb.11: ## %for.body20 +; CHECK-NEXT: ## in Loop: Header=BB0_10 Depth=2 ; CHECK-NEXT: movq $-1000, %rbp ## imm = 0xFC18 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_12: ## %do.body.i -; CHECK-NEXT: ## Parent Loop BB0_9 Depth=1 -; CHECK-NEXT: ## Parent Loop BB0_11 Depth=2 +; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1 +; CHECK-NEXT: ## Parent Loop BB0_10 Depth=2 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=3 ; CHECK-NEXT: cmpb $120, 1000(%rdx,%rbp) ; CHECK-NEXT: je LBB0_14 Index: test/CodeGen/X86/shadow-stack.ll =================================================================== --- test/CodeGen/X86/shadow-stack.ll +++ test/CodeGen/X86/shadow-stack.ll @@ -141,15 +141,14 @@ ; X86_64-NEXT: #EH_SjLj_Setup LBB1_4 ; X86_64-NEXT: ## %bb.1: ## %entry ; X86_64-NEXT: xorl %eax, %eax -; X86_64-NEXT: testl %eax, %eax -; X86_64-NEXT: jne LBB1_3 -; X86_64-NEXT: jmp LBB1_5 +; X86_64-NEXT: jmp LBB1_2 ; X86_64-NEXT: LBB1_4: ## Block address taken ; X86_64-NEXT: ## %entry ; X86_64-NEXT: movl $1, %eax +; X86_64-NEXT: LBB1_2: ## %entry ; X86_64-NEXT: testl %eax, %eax ; X86_64-NEXT: je LBB1_5 -; X86_64-NEXT: LBB1_3: ## %if.end +; X86_64-NEXT: ## %bb.3: ## %if.end ; X86_64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload ; X86_64-NEXT: shll $2, %eax ; X86_64-NEXT: leal (%rax,%rax,2), %eax @@ -190,15 +189,14 @@ ; X86-NEXT: #EH_SjLj_Setup LBB1_4 ; X86-NEXT: ## %bb.1: ## %entry ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: testl %eax, %eax -; X86-NEXT: jne LBB1_3 -; X86-NEXT: jmp LBB1_5 +; X86-NEXT: jmp LBB1_2 ; X86-NEXT: LBB1_4: ## Block address taken ; X86-NEXT: ## %entry ; X86-NEXT: movl $1, %eax +; X86-NEXT: LBB1_2: ## %entry ; X86-NEXT: testl %eax, %eax ; X86-NEXT: je LBB1_5 -; X86-NEXT: LBB1_3: ## %if.end +; X86-NEXT: ## %bb.3: ## %if.end ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: shll $2, %eax ; X86-NEXT: leal (%eax,%eax,2), %eax Index: test/CodeGen/X86/speculative-load-hardening.ll =================================================================== --- test/CodeGen/X86/speculative-load-hardening.ll +++ test/CodeGen/X86/speculative-load-hardening.ll @@ -411,8 +411,18 @@ ; X64-LFENCE-NEXT: pushq %rbx ; X64-LFENCE-NEXT: pushq %rax ; X64-LFENCE-NEXT: testl %edi, %edi -; X64-LFENCE-NEXT: jne .LBB3_6 -; X64-LFENCE-NEXT: # %bb.1: # %l1.header.preheader +; X64-LFENCE-NEXT: je .LBB3_1 +; X64-LFENCE-NEXT: .LBB3_6: # %exit +; X64-LFENCE-NEXT: lfence +; X64-LFENCE-NEXT: addq $8, %rsp +; X64-LFENCE-NEXT: popq %rbx +; X64-LFENCE-NEXT: popq %r12 +; X64-LFENCE-NEXT: popq %r13 +; X64-LFENCE-NEXT: popq %r14 +; X64-LFENCE-NEXT: popq %r15 +; X64-LFENCE-NEXT: popq %rbp +; X64-LFENCE-NEXT: retq +; X64-LFENCE-NEXT: .LBB3_1: # %l1.header.preheader ; X64-LFENCE-NEXT: movq %r8, %r14 ; X64-LFENCE-NEXT: movq %rcx, %rbx ; X64-LFENCE-NEXT: movl %edx, %r13d @@ -452,16 +462,6 @@ ; X64-LFENCE-NEXT: cmpl %r13d, %ebp ; X64-LFENCE-NEXT: jl .LBB3_4 ; X64-LFENCE-NEXT: jmp .LBB3_5 -; X64-LFENCE-NEXT: .LBB3_6: # %exit -; X64-LFENCE-NEXT: lfence -; X64-LFENCE-NEXT: addq $8, %rsp -; X64-LFENCE-NEXT: popq %rbx -; X64-LFENCE-NEXT: popq %r12 -; X64-LFENCE-NEXT: popq %r13 -; X64-LFENCE-NEXT: popq %r14 -; X64-LFENCE-NEXT: popq %r15 -; X64-LFENCE-NEXT: popq %rbp -; X64-LFENCE-NEXT: retq entry: %a.cmp = icmp eq i32 %a, 0 br i1 %a.cmp, label %l1.header, label %exit Index: test/CodeGen/X86/tail-dup-merge-loop-headers.ll =================================================================== --- test/CodeGen/X86/tail-dup-merge-loop-headers.ll +++ test/CodeGen/X86/tail-dup-merge-loop-headers.ll @@ -5,22 +5,23 @@ define void @tail_dup_merge_loops(i32 %a, i8* %b, i8* %c) local_unnamed_addr #0 { ; CHECK-LABEL: tail_dup_merge_loops: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: jmp .LBB0_5 +; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_3: # %inner_loop_exit -; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: incq %rsi +; CHECK-NEXT: .LBB0_1: # %outer_loop_top +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB0_4 Depth 2 ; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: je .LBB0_5 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_2: # %inner_loop_top -; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB0_4 Depth 2 +; CHECK-NEXT: # %bb.2: # %inner_loop_top +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: cmpb $0, (%rsi) ; CHECK-NEXT: js .LBB0_3 ; CHECK-NEXT: .LBB0_4: # %inner_loop_latch -; CHECK-NEXT: # Parent Loop BB0_2 Depth=1 +; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: addq $2, %rsi ; CHECK-NEXT: cmpb $0, (%rsi) @@ -97,7 +98,7 @@ ; CHECK-NEXT: movl $1, %ebx ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB1_26 +; CHECK-NEXT: jne .LBB1_27 ; CHECK-NEXT: # %bb.1: # %if.end19 ; CHECK-NEXT: movl %esi, %r13d ; CHECK-NEXT: movq %rdi, %r12 @@ -108,101 +109,106 @@ ; CHECK-NEXT: movq %r15, %rdi ; CHECK-NEXT: callq cli_calloc ; CHECK-NEXT: testl %r13d, %r13d -; CHECK-NEXT: je .LBB1_25 +; CHECK-NEXT: je .LBB1_26 ; CHECK-NEXT: # %bb.2: # %if.end19 ; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: je .LBB1_25 +; CHECK-NEXT: je .LBB1_26 ; CHECK-NEXT: # %bb.3: # %if.end19 ; CHECK-NEXT: movq %rax, %rbx ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB1_25 +; CHECK-NEXT: jne .LBB1_26 ; CHECK-NEXT: # %bb.4: # %if.end19 ; CHECK-NEXT: cmpq %r12, %rbx -; CHECK-NEXT: jb .LBB1_25 +; CHECK-NEXT: jb .LBB1_26 ; CHECK-NEXT: # %bb.5: # %if.end50 ; CHECK-NEXT: movq %rbx, %rdi ; CHECK-NEXT: movq %r15, %rdx ; CHECK-NEXT: callq memcpy ; CHECK-NEXT: cmpl $4, %r14d -; CHECK-NEXT: jb .LBB1_28 +; CHECK-NEXT: jb .LBB1_29 ; CHECK-NEXT: # %bb.6: # %shared_preheader ; CHECK-NEXT: movb $32, %dl ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: # implicit-def: $rcx +; CHECK-NEXT: jmp .LBB1_9 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB1_7: # %merge_predecessor_split +; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 +; CHECK-NEXT: movb $32, %dl +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: .LBB1_8: # %outer_loop_latch +; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 +; CHECK-NEXT: movzwl %si, %esi +; CHECK-NEXT: decl %esi +; CHECK-NEXT: movzwl %si, %esi +; CHECK-NEXT: leaq 1(%rcx,%rsi), %rcx +; CHECK-NEXT: .LBB1_9: # %outer_loop_header +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB1_10 Depth 2 ; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: je .LBB1_18 +; CHECK-NEXT: je .LBB1_19 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB1_9: # %shared_loop_header -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB1_10: # %shared_loop_header +; CHECK-NEXT: # Parent Loop BB1_9 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: testq %rbx, %rbx -; CHECK-NEXT: jne .LBB1_27 -; CHECK-NEXT: # %bb.10: # %inner_loop_body -; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 +; CHECK-NEXT: jne .LBB1_28 +; CHECK-NEXT: # %bb.11: # %inner_loop_body +; CHECK-NEXT: # in Loop: Header=BB1_10 Depth=2 ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: jns .LBB1_9 -; CHECK-NEXT: # %bb.11: # %if.end96.i +; CHECK-NEXT: jns .LBB1_10 +; CHECK-NEXT: # %bb.12: # %if.end96.i ; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: cmpl $3, %ebp -; CHECK-NEXT: jae .LBB1_22 -; CHECK-NEXT: # %bb.12: # %if.end287.i +; CHECK-NEXT: jae .LBB1_23 +; CHECK-NEXT: # %bb.13: # %if.end287.i ; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: cmpl $1, %ebp ; CHECK-NEXT: setne %dl ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB1_16 -; CHECK-NEXT: # %bb.13: # %if.end308.i +; CHECK-NEXT: jne .LBB1_17 +; CHECK-NEXT: # %bb.14: # %if.end308.i ; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je .LBB1_7 -; CHECK-NEXT: # %bb.14: # %if.end335.i +; CHECK-NEXT: # %bb.15: # %if.end335.i ; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: movl $0, %esi ; CHECK-NEXT: jne .LBB1_8 -; CHECK-NEXT: # %bb.15: # %merge_other +; CHECK-NEXT: # %bb.16: # %merge_other ; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: jmp .LBB1_17 -; CHECK-NEXT: .LBB1_16: # in Loop: Header=BB1_9 Depth=1 +; CHECK-NEXT: jmp .LBB1_18 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB1_17: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: movb %dl, %sil ; CHECK-NEXT: addl $3, %esi -; CHECK-NEXT: .LBB1_17: # %outer_loop_latch +; CHECK-NEXT: .LBB1_18: # %outer_loop_latch ; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: jmp .LBB1_8 -; CHECK-NEXT: .LBB1_7: # %merge_predecessor_split -; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 -; CHECK-NEXT: movb $32, %dl -; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: .LBB1_8: # %outer_loop_latch -; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 -; CHECK-NEXT: movzwl %si, %esi -; CHECK-NEXT: decl %esi -; CHECK-NEXT: movzwl %si, %esi -; CHECK-NEXT: leaq 1(%rcx,%rsi), %rcx -; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: jne .LBB1_9 -; CHECK-NEXT: .LBB1_18: # %while.cond.us1412.i +; CHECK-NEXT: .LBB1_26: +; CHECK-NEXT: movl $1, %ebx +; CHECK-NEXT: jmp .LBB1_27 +; CHECK-NEXT: .LBB1_19: # %while.cond.us1412.i ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: movl $1, %ebx -; CHECK-NEXT: jne .LBB1_20 -; CHECK-NEXT: # %bb.19: # %while.cond.us1412.i +; CHECK-NEXT: jne .LBB1_21 +; CHECK-NEXT: # %bb.20: # %while.cond.us1412.i ; CHECK-NEXT: decb %dl -; CHECK-NEXT: jne .LBB1_26 -; CHECK-NEXT: .LBB1_20: # %if.end41.us1436.i -; CHECK-NEXT: .LBB1_25: -; CHECK-NEXT: movl $1, %ebx -; CHECK-NEXT: jmp .LBB1_26 -; CHECK-NEXT: .LBB1_22: # %if.then99.i +; CHECK-NEXT: jne .LBB1_27 +; CHECK-NEXT: .LBB1_21: # %if.end41.us1436.i +; CHECK-NEXT: .LBB1_23: # %if.then99.i ; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: movl $.str.6, %edi ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: callq cli_dbgmsg -; CHECK-NEXT: .LBB1_26: # %cleanup +; CHECK-NEXT: .LBB1_27: # %cleanup ; CHECK-NEXT: movl %ebx, %eax ; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: popq %rbx @@ -212,8 +218,8 @@ ; CHECK-NEXT: popq %r15 ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB1_27: # %wunpsect.exit.thread.loopexit389 -; CHECK-NEXT: .LBB1_28: # %wunpsect.exit.thread.loopexit391 +; CHECK-NEXT: .LBB1_28: # %wunpsect.exit.thread.loopexit389 +; CHECK-NEXT: .LBB1_29: # %wunpsect.exit.thread.loopexit391 entry: %0 = load i32, i32* undef, align 4 %mul = shl nsw i32 %0, 2 Index: test/CodeGen/X86/tail-dup-repeat.ll =================================================================== --- test/CodeGen/X86/tail-dup-repeat.ll +++ test/CodeGen/X86/tail-dup-repeat.ll @@ -10,28 +10,33 @@ define void @repeated_tail_dup(i1 %a1, i1 %a2, i32* %a4, i32* %a5, i8* %a6, i32 %a7) #0 align 2 { ; CHECK-LABEL: repeated_tail_dup: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: je .LBB0_3 +; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_2: # %land.lhs.true +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $10, (%rdx) ; CHECK-NEXT: .LBB0_6: # %dup2 +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $2, (%rcx) ; CHECK-NEXT: testl %r9d, %r9d ; CHECK-NEXT: jne .LBB0_8 ; CHECK-NEXT: .LBB0_1: # %for.cond +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: testb $1, %dil ; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: .LBB0_3: # %if.end56 +; CHECK-NEXT: # %bb.3: # %if.end56 +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb $1, %sil ; CHECK-NEXT: je .LBB0_5 ; CHECK-NEXT: # %bb.4: # %if.then64 +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movb $1, (%r8) ; CHECK-NEXT: testl %r9d, %r9d ; CHECK-NEXT: je .LBB0_1 ; CHECK-NEXT: jmp .LBB0_8 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_5: # %if.end70 +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $12, (%rdx) ; CHECK-NEXT: jmp .LBB0_6 ; CHECK-NEXT: .LBB0_8: # %for.end Index: test/CodeGen/X86/tail-merge-after-mbp.mir =================================================================== --- test/CodeGen/X86/tail-merge-after-mbp.mir +++ test/CodeGen/X86/tail-merge-after-mbp.mir @@ -18,30 +18,30 @@ ; CHECK: TEST64rr $rax, $rax, implicit-def $eflags ; CHECK: JCC_1 %bb.1, 4, implicit $eflags ; CHECK: bb.3: - ; CHECK: successors: %bb.4(0x30000000), %bb.8(0x50000000) + ; CHECK: successors: %bb.6(0x30000000), %bb.4(0x50000000) ; CHECK: CMP64mi8 killed $rax, 1, $noreg, 8, $noreg, 0, implicit-def $eflags :: (load 8) - ; CHECK: JCC_1 %bb.8, 5, implicit $eflags + ; CHECK: JCC_1 %bb.6, 4, implicit $eflags ; CHECK: bb.4: - ; CHECK: successors: %bb.1(0x30000000), %bb.5(0x50000000) + ; CHECK: $ebp = XOR32rr undef $ebp, undef $ebp, implicit-def dead $eflags + ; CHECK: dead $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags, implicit-def $al + ; CHECK: RETQ $eax + ; CHECK: bb.6: + ; CHECK: successors: %bb.1(0x30000000), %bb.7(0x50000000) ; CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load 8) ; CHECK: TEST64rr $rax, $rax, implicit-def $eflags ; CHECK: JCC_1 %bb.1, 4, implicit $eflags - ; CHECK: bb.5 (align 4): - ; CHECK: successors: %bb.6(0x71555555), %bb.8(0x0eaaaaab) + ; CHECK: bb.7 (align 4): + ; CHECK: successors: %bb.8(0x71555555), %bb.4(0x0eaaaaab) ; CHECK: CMP64mi8 killed $rax, 1, $noreg, 8, $noreg, 0, implicit-def $eflags :: (load 8), (load 8) - ; CHECK: JCC_1 %bb.8, 5, implicit $eflags - ; CHECK: bb.6: - ; CHECK: successors: %bb.1(0x04000000), %bb.5(0x7c000000) + ; CHECK: JCC_1 %bb.4, 5, implicit $eflags + ; CHECK: bb.8: + ; CHECK: successors: %bb.1(0x04000000), %bb.7(0x7c000000) ; CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load 8) ; CHECK: TEST64rr $rax, $rax, implicit-def $eflags - ; CHECK: JCC_1 %bb.5, 5, implicit $eflags + ; CHECK: JCC_1 %bb.7, 5, implicit $eflags ; CHECK: bb.1: ; CHECK: $ebp = XOR32rr undef $ebp, undef $ebp, implicit-def dead $eflags ; CHECK: RETQ $eax - ; CHECK: bb.8: - ; CHECK: $ebp = XOR32rr undef $ebp, undef $ebp, implicit-def dead $eflags - ; CHECK: dead $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags, implicit-def $al - ; CHECK: RETQ $eax bb.0: successors: %bb.1(0x40000000), %bb.7(0x40000000) Index: test/CodeGen/X86/tail-opts.ll =================================================================== --- test/CodeGen/X86/tail-opts.ll +++ test/CodeGen/X86/tail-opts.ll @@ -242,75 +242,68 @@ ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB3_17 +; CHECK-NEXT: jne .LBB3_9 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: movb 0, %bl ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB3_16 +; CHECK-NEXT: jne .LBB3_8 ; CHECK-NEXT: # %bb.2: # %bb.i ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je .LBB3_16 +; CHECK-NEXT: je .LBB3_8 ; CHECK-NEXT: # %bb.3: # %lvalue_p.exit ; CHECK-NEXT: movq 0, %rax ; CHECK-NEXT: movzbl (%rax), %ecx ; CHECK-NEXT: testl %ecx, %ecx -; CHECK-NEXT: je .LBB3_12 +; CHECK-NEXT: je .LBB3_10 ; CHECK-NEXT: # %bb.4: # %lvalue_p.exit ; CHECK-NEXT: cmpl $2, %ecx -; CHECK-NEXT: jne .LBB3_5 -; CHECK-NEXT: # %bb.6: # %bb.i1 +; CHECK-NEXT: jne .LBB3_15 +; CHECK-NEXT: # %bb.5: # %bb.i1 ; CHECK-NEXT: movq 32(%rax), %rax ; CHECK-NEXT: movzbl 16(%rax), %ecx ; CHECK-NEXT: testl %ecx, %ecx -; CHECK-NEXT: je .LBB3_10 -; CHECK-NEXT: # %bb.7: # %bb.i1 +; CHECK-NEXT: je .LBB3_13 +; CHECK-NEXT: # %bb.6: # %bb.i1 ; CHECK-NEXT: cmpl $2, %ecx -; CHECK-NEXT: jne .LBB3_8 -; CHECK-NEXT: # %bb.9: # %bb.i.i +; CHECK-NEXT: jne .LBB3_15 +; CHECK-NEXT: # %bb.7: # %bb.i.i ; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: callq lvalue_p ; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: setne %al -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je .LBB3_15 -; CHECK-NEXT: jmp .LBB3_17 -; CHECK-NEXT: .LBB3_16: # %bb1 +; CHECK-NEXT: jmp .LBB3_16 +; CHECK-NEXT: .LBB3_8: # %bb1 ; CHECK-NEXT: cmpb $23, %bl -; CHECK-NEXT: .LBB3_17: # %bb3 -; CHECK-NEXT: .LBB3_12: # %bb2.i3 +; CHECK-NEXT: .LBB3_9: # %bb3 +; CHECK-NEXT: .LBB3_15: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: .LBB3_16: # %lvalue_p.exit4 +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB3_9 +; CHECK-NEXT: # %bb.17: # %lvalue_p.exit4 +; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: .LBB3_10: # %bb2.i3 ; CHECK-NEXT: movq 8(%rax), %rax ; CHECK-NEXT: movb 16(%rax), %cl ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpb $23, %cl -; CHECK-NEXT: je .LBB3_14 -; CHECK-NEXT: # %bb.13: # %bb2.i3 +; CHECK-NEXT: je .LBB3_16 +; CHECK-NEXT: # %bb.11: # %bb2.i3 ; CHECK-NEXT: cmpb $16, %cl -; CHECK-NEXT: je .LBB3_14 -; CHECK-NEXT: jmp .LBB3_17 -; CHECK-NEXT: .LBB3_5: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je .LBB3_15 -; CHECK-NEXT: jmp .LBB3_17 -; CHECK-NEXT: .LBB3_10: # %bb2.i.i2 +; CHECK-NEXT: je .LBB3_16 +; CHECK-NEXT: jmp .LBB3_9 +; CHECK-NEXT: .LBB3_13: # %bb2.i.i2 ; CHECK-NEXT: movq 8(%rax), %rax ; CHECK-NEXT: movb 16(%rax), %cl ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpb $16, %cl -; CHECK-NEXT: je .LBB3_14 -; CHECK-NEXT: # %bb.11: # %bb2.i.i2 +; CHECK-NEXT: je .LBB3_16 +; CHECK-NEXT: # %bb.14: # %bb2.i.i2 ; CHECK-NEXT: cmpb $23, %cl -; CHECK-NEXT: je .LBB3_14 -; CHECK-NEXT: jmp .LBB3_17 -; CHECK-NEXT: .LBB3_8: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: .LBB3_14: # %lvalue_p.exit4 -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB3_17 -; CHECK-NEXT: .LBB3_15: # %lvalue_p.exit4 -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: je .LBB3_16 +; CHECK-NEXT: jmp .LBB3_9 entry: %tmp4 = load i8, i8* null, align 8 ; [#uses=3] switch i8 %tmp4, label %bb3 [