Index: test/CodeGen/X86/tail-merge-after-mbp.ll =================================================================== --- test/CodeGen/X86/tail-merge-after-mbp.ll +++ test/CodeGen/X86/tail-merge-after-mbp.ll @@ -1,94 +0,0 @@ -; RUN: llc -mtriple=x86_64-linux -o - %s | FileCheck %s - -%0 = type { %1, %3* } -%1 = type { %2* } -%2 = type { %2*, i8* } -%3 = type { i32, i32 (i32, i32)* } - - -declare i32 @Up(...) -declare i32 @f(i32, i32) - -; check loop block_14 is not merged with block_21 -; check loop block_11 is not merged with block_18, block_25 -define i32 @foo(%0* nocapture readonly, i32, i1 %c, i8* %p1, %2** %p2) { -; CHECK-LABEL: foo: -; CHECK: # %block_11 -; CHECK-NEXT: movq (%r14), %rax -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: je -; CHECK-NEXT:# %block_14 -; CHECK-NEXT: cmpq $0, 8(%rax) -; CHECK-NEXT: jne -; CHECK-NEXT:# %block_18 -; CHECK-NEXT: movq (%r14), %rax -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: je -; CHECK-NEXT:# %block_21 -; CHECK-NEXT:# =>This Inner Loop Header -; CHECK-NEXT: cmpq $0, 8(%rax) -; CHECK-NEXT: jne -; CHECK-NEXT:# %block_25 -; CHECK-NEXT:# in Loop -; CHECK-NEXT: movq (%r14), %rax -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: jne - br i1 %c, label %block_34, label %block_3 - -block_3: ; preds = %2 - br i1 %c, label %block_7, label %block_4 - -block_4: ; preds = %block_3 - %a5 = tail call i32 @f(i32 undef, i32 undef) - %a6 = icmp eq i32 %a5, 0 - br i1 %a6, label %block_7, label %block_34 - -block_7: ; preds = %block_4, %block_3 - %a8 = icmp eq %2* null, null - br i1 %a8, label %block_34, label %block_9 - -block_9: ; preds = %block_7 - %a10 = icmp eq i8* %p1, null - br i1 %a10, label %block_11, label %block_32 - -block_11: ; preds = %block_9 - %a12 = load %2*, %2** %p2, align 8 - %a13 = icmp eq %2* %a12, null - br i1 %a13, label %block_34, label %block_14 - -block_14: ; preds = %block_11 - %a15 = getelementptr inbounds %2, %2* %a12, i64 0, i32 1 - %a16 = load i8*, i8** %a15, align 8 - %a17 = icmp eq i8* %a16, null - br i1 %a17, label %block_18, label %block_32 - -block_18: ; preds = %block_14 - %a19 = load %2*, %2** %p2, align 8 - %a20 = icmp eq %2* %a19, null - br i1 %a20, label %block_34, label %block_21 - -block_21: ; preds = %block_18 - %a22 = getelementptr inbounds %2, %2* %a19, i64 0, i32 1 - %a23 = load i8*, i8** %a22, align 8 - %a24 = icmp eq i8* %a23, null - br i1 %a24, label %block_25, label %block_32 - -block_25: ; preds = %block_28, %block_21 - %a26 = load %2*, %2** %p2, align 8 - %a27 = icmp eq %2* %a26, null - br i1 %a27, label %block_34, label %block_28 - -block_28: ; preds = %block_25 - %a29 = getelementptr inbounds %2, %2* %a26, i64 0, i32 1 - %a30 = load i8*, i8** %a29, align 8 - %a31 = icmp eq i8* %a30, null - br i1 %a31, label %block_25, label %block_32 - -block_32: ; preds = %block_28, %block_21, %block_14, %block_9 - %a33 = tail call i32 (...) @Up() - br label %block_34 - -block_34: ; preds = %block_32, %block_25, %block_18, %block_11, %block_7, %block_4, %2 - %a35 = phi i32 [ 0, %2 ], [ %a5, %block_4 ], [ 0, %block_7 ], [ 0, %block_11 ], [ 0, %block_32 ], [ 0, %block_18 ], [ 0, %block_25 ] - ret i32 %a35 -} Index: test/CodeGen/X86/tail-merge-after-mbp.mir =================================================================== --- test/CodeGen/X86/tail-merge-after-mbp.mir +++ test/CodeGen/X86/tail-merge-after-mbp.mir @@ -0,0 +1,249 @@ +# RUN: llc -mtriple=x86_64-linux -run-pass=block-placement -o - %s | FileCheck %s + +--- | + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64--linux" + + %0 = type { %1, %3* } + %1 = type { %2* } + %2 = type { %2*, i8* } + %3 = type { i32, i32 (i32, i32)* } + + declare i32 @Up(...) + + declare i32 @f(i32, i32) + + define i32 @foo(%0* nocapture readonly, i32, i1 %c, i8* %p1, %2** %p2) { + br i1 %c, label %block_34, label %block_3 + + block_3: ; preds = %2 + br i1 %c, label %block_7, label %block_4 + + block_4: ; preds = %block_3 + %a5 = tail call i32 @f(i32 undef, i32 undef) + %a6 = icmp eq i32 %a5, 0 + br i1 %a6, label %block_7, label %block_34 + + block_7: ; preds = %block_4, %block_3 + %a8 = icmp eq %2* null, null + br i1 %a8, label %block_34, label %block_9 + + block_9: ; preds = %block_7 + %a10 = icmp eq i8* %p1, null + br i1 %a10, label %block_11, label %block_32 + + block_11: ; preds = %block_9 + %a12 = load %2*, %2** %p2, align 8 + %a13 = icmp eq %2* %a12, null + br i1 %a13, label %block_34, label %block_14 + + block_14: ; preds = %block_11 + %a15 = getelementptr inbounds %2, %2* %a12, i64 0, i32 1 + %a16 = load i8*, i8** %a15, align 8 + %a17 = icmp eq i8* %a16, null + br i1 %a17, label %block_18, label %block_32 + + block_18: ; preds = %block_14 + %a19 = load %2*, %2** %p2, align 8 + %a20 = icmp eq %2* %a19, null + br i1 %a20, label %block_34, label %block_21 + + block_21: ; preds = %block_18 + %a22 = getelementptr inbounds %2, %2* %a19, i64 0, i32 1 + %a23 = load i8*, i8** %a22, align 8 + %a24 = icmp eq i8* %a23, null + br i1 %a24, label %block_25.preheader, label %block_32 + + block_25.preheader: ; preds = %block_21 + br label %block_25 + + block_25: ; preds = %block_25.preheader, %block_28 + %a26 = load %2*, %2** %p2, align 8 + %a27 = icmp eq %2* %a26, null + br i1 %a27, label %block_34, label %block_28 + + block_28: ; preds = %block_25 + %a29 = getelementptr inbounds %2, %2* %a26, i64 0, i32 1 + %a30 = load i8*, i8** %a29, align 8 + %a31 = icmp eq i8* %a30, null + br i1 %a31, label %block_25, label %block_32 + + block_32: ; preds = %block_28, %block_21, %block_14, %block_9 + %a33 = tail call i32 (...) @Up() + br label %block_34 + + block_34: ; preds = %block_25, %block_32, %block_18, %block_11, %block_7, %block_4, %2 + %a35 = phi i32 [ 0, %2 ], [ %a5, %block_4 ], [ 0, %block_7 ], [ 0, %block_11 ], [ 0, %block_32 ], [ 0, %block_18 ], [ 0, %block_25 ] + ret i32 %a35 + } + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #0 + + attributes #0 = { nounwind } + +... +--- +# CHECK: name: foo +name: foo +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '%edx' } + - { reg: '%rcx' } + - { reg: '%r8' } +fixedStack: + - { id: 0, type: spill-slot, offset: -32, size: 8, alignment: 16, callee-saved-register: '%rbx' } + - { id: 1, type: spill-slot, offset: -24, size: 8, alignment: 8, callee-saved-register: '%r14' } + - { id: 2, type: spill-slot, offset: -16, size: 8, alignment: 16, callee-saved-register: '%rbp' } + +# check loop block_14 is not merged with block_21 +# check loop block_11 is not merged with block_18, block_25 +# +# CHECK: bb.9.block_11: +# CHECK: MOV64rm +# CHECK-NEXT: TEST64rr +# CHECK-NEXT: JE_1 +# CHECK: bb.10.block_14: +# CHECK: CMP64mi8 +# CHECK-NEXT: JNE_1 +# CHECK: bb.11.block_18: +# CHECK: MOV64rm +# CHECK-NEXT: TEST64rr +# CHECK-NEXT: JE_1 +# CHECK: bb.12.block_21: +# CHECK: CMP64mi8 +# CHECK-NEXT: JNE_1 +# CHECK: bb.13.block_25: +# CHECK: MOV64rm +# CHECK-NEXT: TEST64rr +# CHECK-NEXT: JNE_1 + +body: | + bb.0 (%ir-block.2): + successors: %bb.1(0x40000000), %bb.2.block_3(0x40000000) + liveins: %edx, %rcx, %r8, %rbp, %r14, %rbx + + frame-setup PUSH64r killed %rbp, implicit-def %rsp, implicit %rsp + frame-setup PUSH64r killed %r14, implicit-def %rsp, implicit %rsp + frame-setup PUSH64r killed %rbx, implicit-def %rsp, implicit %rsp + %r14 = MOV64rr %r8 + %rbx = MOV64rr %rcx + TEST8ri %dl, 1, implicit-def %eflags, implicit killed %edx + JE_1 %bb.2.block_3, implicit %eflags + + bb.1: + successors: %bb.16.block_34(0x80000000) + + %ebp = XOR32rr undef %ebp, undef %ebp, implicit-def dead %eflags + JMP_1 %bb.16.block_34 + + bb.2.block_3: + successors: %bb.4.block_7(0x40000000), %bb.3.block_4(0x40000000) + liveins: %eflags, %rbx, %r14 + + JNE_1 %bb.4.block_7, implicit %eflags + + bb.3.block_4: + successors: %bb.4.block_7(0x30000000), %bb.16.block_34(0x50000000) + liveins: %rbx, %r14 + + CALL64pcrel32 @f, csr_64, implicit %rsp, implicit undef %edi, implicit undef %esi, implicit-def %rsp, implicit-def %eax + %ebp = MOV32rr %eax + TEST32rr %ebp, %ebp, implicit-def %eflags + JNE_1 %bb.16.block_34, implicit %eflags + + bb.4.block_7: + successors: %bb.5(0x30000000), %bb.6.block_9(0x50000000) + liveins: %rbx, %r14 + + %al = MOV8ri 1 + TEST8rr killed %al, %al, implicit-def %eflags + JE_1 %bb.6.block_9, implicit killed %eflags + + bb.5: + successors: %bb.16.block_34(0x80000000) + + %ebp = XOR32rr undef %ebp, undef %ebp, implicit-def dead %eflags + JMP_1 %bb.16.block_34 + + bb.6.block_9: + successors: %bb.7.block_11(0x30000000), %bb.15.block_32(0x50000000) + liveins: %rbx, %r14 + + TEST64rr killed %rbx, %rbx, implicit-def %eflags + JNE_1 %bb.15.block_32, implicit %eflags + + bb.7.block_11: + successors: %bb.8(0x30000000), %bb.9.block_14(0x50000000) + liveins: %r14 + + %rax = MOV64rm %r14, 1, _, 0, _ + TEST64rr %rax, %rax, implicit-def %eflags + JNE_1 %bb.9.block_14, implicit killed %eflags + + bb.8: + successors: %bb.16.block_34(0x80000000) + + %ebp = XOR32rr undef %ebp, undef %ebp, implicit-def dead %eflags + JMP_1 %bb.16.block_34 + + bb.9.block_14: + successors: %bb.10.block_18(0x30000000), %bb.15.block_32(0x50000000) + liveins: %rax, %r14 + + CMP64mi8 killed %rax, 1, _, 8, _, 0, implicit-def %eflags + JNE_1 %bb.15.block_32, implicit %eflags + + bb.10.block_18: + successors: %bb.11(0x30000000), %bb.12.block_21(0x50000000) + liveins: %r14 + + %rax = MOV64rm %r14, 1, _, 0, _ + TEST64rr %rax, %rax, implicit-def %eflags + JNE_1 %bb.12.block_21, implicit %eflags + + bb.11: + successors: %bb.16.block_34(0x80000000) + + %ebp = XOR32rr undef %ebp, undef %ebp, implicit-def dead %eflags + JMP_1 %bb.16.block_34 + + bb.12.block_21: + successors: %bb.13.block_25(0x71555555), %bb.15.block_32(0x0eaaaaab) + liveins: %r14, %rax + + CMP64mi8 killed %rax, 1, _, 8, _, 0, implicit-def %eflags + JNE_1 %bb.15.block_32, implicit %eflags + + bb.13.block_25: + successors: %bb.14(0x04000000), %bb.12.block_21(0x7c000000) + liveins: %r14 + + %rax = MOV64rm %r14, 1, _, 0, _ + TEST64rr %rax, %rax, implicit-def %eflags + JNE_1 %bb.12.block_21, implicit %eflags + + bb.14: + successors: %bb.16.block_34(0x80000000) + + %ebp = XOR32rr undef %ebp, undef %ebp, implicit-def dead %eflags + JMP_1 %bb.16.block_34 + + bb.15.block_32: + successors: %bb.16.block_34(0x80000000) + + %ebp = XOR32rr undef %ebp, undef %ebp, implicit-def dead %eflags + dead %eax = XOR32rr undef %eax, undef %eax, implicit-def dead %eflags, implicit-def %al + CALL64pcrel32 @Up, csr_64, implicit %rsp, implicit %al, implicit-def %rsp, implicit-def dead %eax + + bb.16.block_34: + liveins: %ebp + + %eax = MOV32rr killed %ebp + %rbx = POP64r implicit-def %rsp, implicit %rsp + %r14 = POP64r implicit-def %rsp, implicit %rsp + %rbp = POP64r implicit-def %rsp, implicit %rsp + RETQ %eax + +...