Index: lib/CodeGen/BranchFolding.cpp =================================================================== --- lib/CodeGen/BranchFolding.cpp +++ lib/CodeGen/BranchFolding.cpp @@ -1070,31 +1070,28 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { bool MadeChange = false; - if (!EnableTailMerge) return MadeChange; + if (!EnableTailMerge) + return MadeChange; // First find blocks with no successors. - // Block placement does not create new tail merging opportunities for these - // blocks. - if (!AfterBlockPlacement) { - MergePotentials.clear(); - for (MachineBasicBlock &MBB : MF) { - if (MergePotentials.size() == TailMergeThreshold) - break; - if (!TriedMerging.count(&MBB) && MBB.succ_empty()) - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB)); - } - - // If this is a large problem, avoid visiting the same basic blocks - // multiple times. + MergePotentials.clear(); + for (MachineBasicBlock &MBB : MF) { if (MergePotentials.size() == TailMergeThreshold) - for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) - TriedMerging.insert(MergePotentials[i].getBlock()); - - // See if we can do any tail merging on those. - if (MergePotentials.size() >= 2) - MadeChange |= TryTailMergeBlocks(nullptr, nullptr, MinCommonTailLength); + break; + if (!TriedMerging.count(&MBB) && MBB.succ_empty()) + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB)); } + // If this is a large problem, avoid visiting the same basic blocks + // multiple times. + if (MergePotentials.size() == TailMergeThreshold) + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + TriedMerging.insert(MergePotentials[i].getBlock()); + + // See if we can do any tail merging on those. + if (MergePotentials.size() >= 2) + MadeChange |= TryTailMergeBlocks(nullptr, nullptr, MinCommonTailLength); + // Look at blocks (IBB) with multiple predecessors (PBB). // We change each predecessor to a canonical form, by // (1) temporarily removing any unconditional branch from the predecessor Index: test/CodeGen/Thumb2/v8_IT_3.ll =================================================================== --- test/CodeGen/Thumb2/v8_IT_3.ll +++ test/CodeGen/Thumb2/v8_IT_3.ll @@ -61,9 +61,7 @@ ; CHECK-PIC-NEXT: cmpeq ; CHECK-PIC-NEXT: beq ; CHECK-PIC: %bb6 -; CHECK-PIC-NEXT: movs -; CHECK-PIC-NEXT: add -; CHECK-PIC-NEXT: pop +; CHECK-PIC: mov ret i32 0 bb6: Index: test/CodeGen/WinEH/wineh-noret-cleanup.ll =================================================================== --- test/CodeGen/WinEH/wineh-noret-cleanup.ll +++ test/CodeGen/WinEH/wineh-noret-cleanup.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: sed -e s/.Cxx:// %s | llc -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefix=CXX ; RUN: sed -e s/.Seh:// %s | llc -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefix=SEH @@ -68,13 +69,13 @@ ; SEH-NEXT: .long .Ltmp0@IMGREL+1 ; SEH-NEXT: .long .Ltmp1@IMGREL+1 ; SEH-NEXT: .long dummy_filter@IMGREL -; SEH-NEXT: .long .LBB0_5@IMGREL +; SEH-NEXT: .long .LBB0_2@IMGREL ; SEH-NEXT: .long .Ltmp2@IMGREL+1 ; SEH-NEXT: .long .Ltmp3@IMGREL+1 -; SEH-NEXT: .long "?dtor$2@?0?test@4HA"@IMGREL +; SEH-NEXT: .long "?dtor$5@?0?test@4HA"@IMGREL ; SEH-NEXT: .long 0 ; SEH-NEXT: .long .Ltmp2@IMGREL+1 ; SEH-NEXT: .long .Ltmp3@IMGREL+1 ; SEH-NEXT: .long dummy_filter@IMGREL -; SEH-NEXT: .long .LBB0_5@IMGREL +; SEH-NEXT: .long .LBB0_2@IMGREL ; SEH-NEXT: .Llsda_end0: Index: test/CodeGen/X86/conditional-tailcall.ll =================================================================== --- test/CodeGen/X86/conditional-tailcall.ll +++ test/CodeGen/X86/conditional-tailcall.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-linux -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK32 ; RUN: llc < %s -mtriple=x86_64-linux -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK64 ; RUN: llc < %s -mtriple=x86_64-win32 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=WIN64 @@ -6,6 +7,39 @@ declare void @bar() define void @f(i32 %x, i32 %y) optsize { +; CHECK32-LABEL: f: +; CHECK32: # %bb.0: # %entry +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; CHECK32-NEXT: cmpl {{[0-9]+}}(%esp), %eax # encoding: [0x3b,0x44,0x24,0x08] +; CHECK32-NEXT: jne bar # TAILCALL +; CHECK32-NEXT: # encoding: [0x75,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.1: # %bb1 +; CHECK32-NEXT: jmp foo # TAILCALL +; CHECK32-NEXT: # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 +; +; CHECK64-LABEL: f: +; CHECK64: # %bb.0: # %entry +; CHECK64-NEXT: cmpl %esi, %edi # encoding: [0x39,0xf7] +; CHECK64-NEXT: jne bar # TAILCALL +; CHECK64-NEXT: # encoding: [0x75,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.1: # %bb1 +; CHECK64-NEXT: jmp foo # TAILCALL +; CHECK64-NEXT: # encoding: [0xeb,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 +; +; WIN64-LABEL: f: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: cmpl %edx, %ecx # encoding: [0x39,0xd1] +; WIN64-NEXT: jne bar # TAILCALL +; WIN64-NEXT: # encoding: [0x75,A] +; WIN64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.1: # %bb1 +; WIN64-NEXT: jmp foo # TAILCALL +; WIN64-NEXT: # encoding: [0xeb,A] +; WIN64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 entry: %p = icmp eq i32 %x, %y br i1 %p, label %bb1, label %bb2 @@ -16,15 +50,83 @@ tail call void @bar() ret void -; CHECK-LABEL: f: -; CHECK: cmp -; CHECK: jne bar ; Check that the asm doesn't just look good, but uses the correct encoding. -; CHECK: encoding: [0x75,A] -; CHECK: jmp foo } define void @f_non_leaf(i32 %x, i32 %y) optsize { +; CHECK32-LABEL: f_non_leaf: +; CHECK32: # %bb.0: # %entry +; CHECK32-NEXT: pushl %ebx # encoding: [0x53] +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: .cfi_offset %ebx, -8 +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] +; CHECK32-NEXT: #APP +; CHECK32-NEXT: #NO_APP +; CHECK32-NEXT: cmpl {{[0-9]+}}(%esp), %eax # encoding: [0x3b,0x44,0x24,0x0c] +; CHECK32-NEXT: jne .LBB1_2 # encoding: [0x75,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.1: # %bb1 +; CHECK32-NEXT: popl %ebx # encoding: [0x5b] +; CHECK32-NEXT: .cfi_def_cfa_offset 4 +; CHECK32-NEXT: jmp foo # TAILCALL +; CHECK32-NEXT: # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB1_2: # %bb2 +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: popl %ebx # encoding: [0x5b] +; CHECK32-NEXT: .cfi_def_cfa_offset 4 +; CHECK32-NEXT: jmp bar # TAILCALL +; CHECK32-NEXT: # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; +; CHECK64-LABEL: f_non_leaf: +; CHECK64: # %bb.0: # %entry +; CHECK64-NEXT: pushq %rbx # encoding: [0x53] +; CHECK64-NEXT: .cfi_def_cfa_offset 16 +; CHECK64-NEXT: .cfi_offset %rbx, -16 +; CHECK64-NEXT: #APP +; CHECK64-NEXT: #NO_APP +; CHECK64-NEXT: cmpl %esi, %edi # encoding: [0x39,0xf7] +; CHECK64-NEXT: jne .LBB1_2 # encoding: [0x75,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.1: # %bb1 +; CHECK64-NEXT: popq %rbx # encoding: [0x5b] +; CHECK64-NEXT: .cfi_def_cfa_offset 8 +; CHECK64-NEXT: jmp foo # TAILCALL +; CHECK64-NEXT: # encoding: [0xeb,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 +; CHECK64-NEXT: .LBB1_2: # %bb2 +; CHECK64-NEXT: .cfi_def_cfa_offset 16 +; CHECK64-NEXT: popq %rbx # encoding: [0x5b] +; CHECK64-NEXT: .cfi_def_cfa_offset 8 +; CHECK64-NEXT: jmp bar # TAILCALL +; CHECK64-NEXT: # encoding: [0xeb,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; +; WIN64-LABEL: f_non_leaf: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: pushq %rbx # encoding: [0x53] +; WIN64-NEXT: .seh_pushreg 3 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: #APP +; WIN64-NEXT: #NO_APP +; WIN64-NEXT: cmpl %edx, %ecx # encoding: [0x39,0xd1] +; WIN64-NEXT: jne .LBB1_2 # encoding: [0x75,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.1: # %bb1 +; WIN64-NEXT: popq %rbx # encoding: [0x5b] +; WIN64-NEXT: jmp foo # TAILCALL +; WIN64-NEXT: # encoding: [0xeb,A] +; WIN64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 +; WIN64-NEXT: .LBB1_2: # %bb2 +; WIN64-NEXT: nop # encoding: [0x90] +; WIN64-NEXT: popq %rbx # encoding: [0x5b] +; WIN64-NEXT: jmp bar # TAILCALL +; WIN64-NEXT: # encoding: [0xeb,A] +; WIN64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc entry: ; Force %ebx to be spilled on the stack, turning this into ; not a "leaf" function for Win64. @@ -39,16 +141,67 @@ tail call void @bar() ret void -; CHECK-LABEL: f_non_leaf: -; WIN64-NOT: je foo -; WIN64-NOT: jne bar -; WIN64: jne -; WIN64: jmp foo -; WIN64: jmp bar } declare x86_thiscallcc zeroext i1 @baz(i8*, i32) define x86_thiscallcc zeroext i1 @BlockPlacementTest(i8* %this, i32 %x) optsize { +; CHECK32-LABEL: BlockPlacementTest: +; CHECK32: # %bb.0: # %entry +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04] +; CHECK32-NEXT: testb $42, %dl # encoding: [0xf6,0xc2,0x2a] +; CHECK32-NEXT: je .LBB2_3 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB2_3-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.1: # %land.rhs +; CHECK32-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK32-NEXT: testb $44, %dl # encoding: [0xf6,0xc2,0x2c] +; CHECK32-NEXT: je baz # TAILCALL +; CHECK32-NEXT: # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: baz-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB2_2: # %land.end +; CHECK32-NEXT: # kill: def $al killed $al killed $eax +; CHECK32-NEXT: retl $4 # encoding: [0xc2,0x04,0x00] +; CHECK32-NEXT: .LBB2_3: +; CHECK32-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; CHECK32-NEXT: jmp .LBB2_2 # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB2_2-1, kind: FK_PCRel_1 +; +; CHECK64-LABEL: BlockPlacementTest: +; CHECK64: # %bb.0: # %entry +; CHECK64-NEXT: testb $42, %sil # encoding: [0x40,0xf6,0xc6,0x2a] +; CHECK64-NEXT: je .LBB2_3 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB2_3-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.1: # %land.rhs +; CHECK64-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK64-NEXT: testb $44, %sil # encoding: [0x40,0xf6,0xc6,0x2c] +; CHECK64-NEXT: je baz # TAILCALL +; CHECK64-NEXT: # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: baz-1, kind: FK_PCRel_1 +; CHECK64-NEXT: .LBB2_2: # %land.end +; CHECK64-NEXT: # kill: def $al killed $al killed $eax +; CHECK64-NEXT: retq # encoding: [0xc3] +; CHECK64-NEXT: .LBB2_3: +; CHECK64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; CHECK64-NEXT: jmp .LBB2_2 # encoding: [0xeb,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB2_2-1, kind: FK_PCRel_1 +; +; WIN64-LABEL: BlockPlacementTest: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: testb $42, %dl # encoding: [0xf6,0xc2,0x2a] +; WIN64-NEXT: je .LBB2_3 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB2_3-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.1: # %land.rhs +; WIN64-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; WIN64-NEXT: testb $44, %dl # encoding: [0xf6,0xc2,0x2c] +; WIN64-NEXT: je baz # TAILCALL +; WIN64-NEXT: # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: baz-1, kind: FK_PCRel_1 +; WIN64-NEXT: .LBB2_2: # %land.end +; WIN64-NEXT: # kill: def $al killed $al killed $eax +; WIN64-NEXT: retq # encoding: [0xc3] +; WIN64-NEXT: .LBB2_3: +; WIN64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; WIN64-NEXT: jmp .LBB2_2 # encoding: [0xeb,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB2_2-1, kind: FK_PCRel_1 entry: %and = and i32 %x, 42 %tobool = icmp eq i32 %and, 0 @@ -69,10 +222,6 @@ ; Make sure machine block placement isn't confused by the conditional tail call, ; but sees that it can fall through to the next block. -; CHECK-LABEL: BlockPlacementTest -; CHECK: je baz -; CHECK-NOT: xor -; CHECK: ret } @@ -82,7 +231,296 @@ declare zeroext i1 @_Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_(i8*, i8*) define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly dereferenceable(8) %s) minsize { -; CHECK-LABEL: pr31257 +; CHECK32-LABEL: pr31257: +; CHECK32: # %bb.0: # %entry +; CHECK32-NEXT: pushl %ebp # encoding: [0x55] +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: pushl %ebx # encoding: [0x53] +; CHECK32-NEXT: .cfi_def_cfa_offset 12 +; CHECK32-NEXT: pushl %edi # encoding: [0x57] +; CHECK32-NEXT: .cfi_def_cfa_offset 16 +; CHECK32-NEXT: pushl %esi # encoding: [0x56] +; CHECK32-NEXT: .cfi_def_cfa_offset 20 +; CHECK32-NEXT: subl $12, %esp # encoding: [0x83,0xec,0x0c] +; CHECK32-NEXT: .cfi_def_cfa_offset 32 +; CHECK32-NEXT: .cfi_offset %esi, -20 +; CHECK32-NEXT: .cfi_offset %edi, -16 +; CHECK32-NEXT: .cfi_offset %ebx, -12 +; CHECK32-NEXT: .cfi_offset %ebp, -8 +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x20] +; CHECK32-NEXT: movl (%eax), %eax # encoding: [0x8b,0x00] +; CHECK32-NEXT: movl -24(%eax), %edx # encoding: [0x8b,0x50,0xe8] +; CHECK32-NEXT: leal (%eax,%edx), %ebp # encoding: [0x8d,0x2c,0x10] +; CHECK32-NEXT: xorl %ebx, %ebx # encoding: [0x31,0xdb] +; CHECK32-NEXT: pushl $2 # encoding: [0x6a,0x02] +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: popl %esi # encoding: [0x5e] +; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK32-NEXT: xorl %edi, %edi # encoding: [0x31,0xff] +; CHECK32-NEXT: incl %edi # encoding: [0x47] +; CHECK32-NEXT: jmp .LBB3_1 # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB3_2: # %for.body +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: cmpl $2, %ebx # encoding: [0x83,0xfb,0x02] +; CHECK32-NEXT: je .LBB3_11 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.3: # %for.body +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: cmpl $1, %ebx # encoding: [0x83,0xfb,0x01] +; CHECK32-NEXT: je .LBB3_9 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.4: # %for.body +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: testl %ebx, %ebx # encoding: [0x85,0xdb] +; CHECK32-NEXT: jne .LBB3_10 # encoding: [0x75,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.5: # %sw.bb +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: movzbl (%eax), %ecx # encoding: [0x0f,0xb6,0x08] +; CHECK32-NEXT: cmpl $43, %ecx # encoding: [0x83,0xf9,0x2b] +; CHECK32-NEXT: movl %edi, %ebx # encoding: [0x89,0xfb] +; CHECK32-NEXT: je .LBB3_10 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.6: # %sw.bb +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: cmpb $45, %cl # encoding: [0x80,0xf9,0x2d] +; CHECK32-NEXT: movl %edi, %ebx # encoding: [0x89,0xfb] +; CHECK32-NEXT: je .LBB3_10 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; CHECK32-NEXT: jmp .LBB3_7 # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB3_11: # %sw.bb22 +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: movzbl (%eax), %ecx # encoding: [0x0f,0xb6,0x08] +; CHECK32-NEXT: addl $-48, %ecx # encoding: [0x83,0xc1,0xd0] +; CHECK32-NEXT: cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a] +; CHECK32-NEXT: movl %esi, %ebx # encoding: [0x89,0xf3] +; CHECK32-NEXT: jb .LBB3_10 # encoding: [0x72,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; CHECK32-NEXT: jmp .LBB3_12 # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB3_9: # %sw.bb14 +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: movzbl (%eax), %ecx # encoding: [0x0f,0xb6,0x08] +; CHECK32-NEXT: .LBB3_7: # %if.else +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: addl $-48, %ecx # encoding: [0x83,0xc1,0xd0] +; CHECK32-NEXT: cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a] +; CHECK32-NEXT: movl %esi, %ebx # encoding: [0x89,0xf3] +; CHECK32-NEXT: jae .LBB3_8 # encoding: [0x73,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB3_10: # %for.inc +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: incl %eax # encoding: [0x40] +; CHECK32-NEXT: decl %edx # encoding: [0x4a] +; CHECK32-NEXT: .LBB3_1: # %for.cond +; CHECK32-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK32-NEXT: testl %edx, %edx # encoding: [0x85,0xd2] +; CHECK32-NEXT: jne .LBB3_2 # encoding: [0x75,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_2-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.13: +; CHECK32-NEXT: cmpl $2, %ebx # encoding: [0x83,0xfb,0x02] +; CHECK32-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; CHECK32-NEXT: jmp .LBB3_14 # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_14-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB3_8: +; CHECK32-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; CHECK32-NEXT: .LBB3_14: # %cleanup.thread +; CHECK32-NEXT: # kill: def $al killed $al killed $eax +; CHECK32-NEXT: addl $12, %esp # encoding: [0x83,0xc4,0x0c] +; CHECK32-NEXT: .cfi_def_cfa_offset 20 +; CHECK32-NEXT: .LBB3_15: # %cleanup.thread +; CHECK32-NEXT: popl %esi # encoding: [0x5e] +; CHECK32-NEXT: .cfi_def_cfa_offset 16 +; CHECK32-NEXT: popl %edi # encoding: [0x5f] +; CHECK32-NEXT: .cfi_def_cfa_offset 12 +; CHECK32-NEXT: popl %ebx # encoding: [0x5b] +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: popl %ebp # encoding: [0x5d] +; CHECK32-NEXT: .cfi_def_cfa_offset 4 +; CHECK32-NEXT: retl # encoding: [0xc3] +; CHECK32-NEXT: .LBB3_12: # %if.else28 +; CHECK32-NEXT: .cfi_def_cfa_offset 32 +; CHECK32-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] +; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK32-NEXT: pushl %ebp # encoding: [0x55] +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl %eax # encoding: [0x50] +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: calll _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_ # encoding: [0xe8,A,A,A,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-4, kind: FK_PCRel_4 +; CHECK32-NEXT: addl $28, %esp # encoding: [0x83,0xc4,0x1c] +; CHECK32-NEXT: .cfi_adjust_cfa_offset -28 +; CHECK32-NEXT: jmp .LBB3_15 # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_15-1, kind: FK_PCRel_1 +; +; CHECK64-LABEL: pr31257: +; CHECK64: # %bb.0: # %entry +; CHECK64-NEXT: movq (%rdi), %rdi # encoding: [0x48,0x8b,0x3f] +; CHECK64-NEXT: movq -24(%rdi), %rax # encoding: [0x48,0x8b,0x47,0xe8] +; CHECK64-NEXT: leaq (%rdi,%rax), %rsi # encoding: [0x48,0x8d,0x34,0x07] +; CHECK64-NEXT: xorl %ecx, %ecx # encoding: [0x31,0xc9] +; CHECK64-NEXT: pushq $2 # encoding: [0x6a,0x02] +; CHECK64-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK64-NEXT: popq %r9 # encoding: [0x41,0x59] +; CHECK64-NEXT: .cfi_adjust_cfa_offset -8 +; CHECK64-NEXT: pushq $1 # encoding: [0x6a,0x01] +; CHECK64-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK64-NEXT: popq %r8 # encoding: [0x41,0x58] +; CHECK64-NEXT: .cfi_adjust_cfa_offset -8 +; CHECK64-NEXT: jmp .LBB3_11 # encoding: [0xeb,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 +; CHECK64-NEXT: .LBB3_1: # %for.body +; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: cmpl $2, %ecx # encoding: [0x83,0xf9,0x02] +; CHECK64-NEXT: je .LBB3_9 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.2: # %for.body +; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: cmpl $1, %ecx # encoding: [0x83,0xf9,0x01] +; CHECK64-NEXT: je .LBB3_7 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.3: # %for.body +; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: testl %ecx, %ecx # encoding: [0x85,0xc9] +; CHECK64-NEXT: jne .LBB3_10 # encoding: [0x75,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.4: # %sw.bb +; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: movzbl (%rdi), %edx # encoding: [0x0f,0xb6,0x17] +; CHECK64-NEXT: cmpl $43, %edx # encoding: [0x83,0xfa,0x2b] +; CHECK64-NEXT: movl %r8d, %ecx # encoding: [0x44,0x89,0xc1] +; CHECK64-NEXT: je .LBB3_10 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.5: # %sw.bb +; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: cmpb $45, %dl # encoding: [0x80,0xfa,0x2d] +; CHECK64-NEXT: movl %r8d, %ecx # encoding: [0x44,0x89,0xc1] +; CHECK64-NEXT: je .LBB3_10 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.6: # %if.else +; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: addl $-48, %edx # encoding: [0x83,0xc2,0xd0] +; CHECK64-NEXT: cmpl $10, %edx # encoding: [0x83,0xfa,0x0a] +; CHECK64-NEXT: jmp .LBB3_8 # encoding: [0xeb,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1 +; CHECK64-NEXT: .LBB3_7: # %sw.bb14 +; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: movzbl (%rdi), %ecx # encoding: [0x0f,0xb6,0x0f] +; CHECK64-NEXT: addl $-48, %ecx # encoding: [0x83,0xc1,0xd0] +; CHECK64-NEXT: cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a] +; CHECK64-NEXT: .LBB3_8: # %if.else +; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: movl %r9d, %ecx # encoding: [0x44,0x89,0xc9] +; CHECK64-NEXT: jb .LBB3_10 # encoding: [0x72,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; CHECK64-NEXT: jmp .LBB3_13 # encoding: [0xeb,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_13-1, kind: FK_PCRel_1 +; CHECK64-NEXT: .LBB3_9: # %sw.bb22 +; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: movzbl (%rdi), %ecx # encoding: [0x0f,0xb6,0x0f] +; CHECK64-NEXT: addl $-48, %ecx # encoding: [0x83,0xc1,0xd0] +; CHECK64-NEXT: cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a] +; CHECK64-NEXT: movl %r9d, %ecx # encoding: [0x44,0x89,0xc9] +; CHECK64-NEXT: jae _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_ # TAILCALL +; CHECK64-NEXT: # encoding: [0x73,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-1, kind: FK_PCRel_1 +; CHECK64-NEXT: .LBB3_10: # %for.inc +; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: incq %rdi # encoding: [0x48,0xff,0xc7] +; CHECK64-NEXT: decq %rax # encoding: [0x48,0xff,0xc8] +; CHECK64-NEXT: .LBB3_11: # %for.cond +; CHECK64-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK64-NEXT: testq %rax, %rax # encoding: [0x48,0x85,0xc0] +; CHECK64-NEXT: jne .LBB3_1 # encoding: [0x75,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.12: +; CHECK64-NEXT: cmpl $2, %ecx # encoding: [0x83,0xf9,0x02] +; CHECK64-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; CHECK64-NEXT: # kill: def $al killed $al killed $eax +; CHECK64-NEXT: retq # encoding: [0xc3] +; CHECK64-NEXT: .LBB3_13: +; CHECK64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; CHECK64-NEXT: # kill: def $al killed $al killed $eax +; CHECK64-NEXT: retq # encoding: [0xc3] +; +; WIN64-LABEL: pr31257: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: movq (%rcx), %rcx # encoding: [0x48,0x8b,0x09] +; WIN64-NEXT: movq -24(%rcx), %r8 # encoding: [0x4c,0x8b,0x41,0xe8] +; WIN64-NEXT: leaq (%rcx,%r8), %rdx # encoding: [0x4a,0x8d,0x14,0x01] +; WIN64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; WIN64-NEXT: jmp .LBB3_10 # encoding: [0xeb,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; WIN64-NEXT: .LBB3_1: # %for.body +; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: cmpl $2, %eax # encoding: [0x83,0xf8,0x02] +; WIN64-NEXT: je .LBB3_8 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.2: # %for.body +; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: cmpl $1, %eax # encoding: [0x83,0xf8,0x01] +; WIN64-NEXT: je .LBB3_6 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_6-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.3: # %for.body +; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: testl %eax, %eax # encoding: [0x85,0xc0] +; WIN64-NEXT: jne .LBB3_9 # encoding: [0x75,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.4: # %sw.bb +; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09] +; WIN64-NEXT: cmpl $43, %r9d # encoding: [0x41,0x83,0xf9,0x2b] +; WIN64-NEXT: movl $1, %eax # encoding: [0xb8,0x01,0x00,0x00,0x00] +; WIN64-NEXT: je .LBB3_9 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.5: # %sw.bb +; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: cmpb $45, %r9b # encoding: [0x41,0x80,0xf9,0x2d] +; WIN64-NEXT: je .LBB3_9 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 +; WIN64-NEXT: jmp .LBB3_7 # encoding: [0xeb,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1 +; WIN64-NEXT: .LBB3_6: # %sw.bb14 +; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09] +; WIN64-NEXT: .LBB3_7: # %if.else +; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: addl $-48, %r9d # encoding: [0x41,0x83,0xc1,0xd0] +; WIN64-NEXT: movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00] +; WIN64-NEXT: cmpl $10, %r9d # encoding: [0x41,0x83,0xf9,0x0a] +; WIN64-NEXT: jb .LBB3_9 # encoding: [0x72,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 +; WIN64-NEXT: jmp .LBB3_12 # encoding: [0xeb,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1 +; WIN64-NEXT: .LBB3_8: # %sw.bb22 +; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09] +; WIN64-NEXT: addl $-48, %r9d # encoding: [0x41,0x83,0xc1,0xd0] +; WIN64-NEXT: movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00] +; WIN64-NEXT: cmpl $10, %r9d # encoding: [0x41,0x83,0xf9,0x0a] +; WIN64-NEXT: jae _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_ # TAILCALL +; WIN64-NEXT: # encoding: [0x73,A] +; WIN64-NEXT: # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-1, kind: FK_PCRel_1 +; WIN64-NEXT: .LBB3_9: # %for.inc +; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: incq %rcx # encoding: [0x48,0xff,0xc1] +; WIN64-NEXT: decq %r8 # encoding: [0x49,0xff,0xc8] +; WIN64-NEXT: .LBB3_10: # %for.cond +; WIN64-NEXT: # =>This Inner Loop Header: Depth=1 +; WIN64-NEXT: testq %r8, %r8 # encoding: [0x4d,0x85,0xc0] +; WIN64-NEXT: jne .LBB3_1 # encoding: [0x75,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.11: +; WIN64-NEXT: cmpl $2, %eax # encoding: [0x83,0xf8,0x02] +; WIN64-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; WIN64-NEXT: # kill: def $al killed $al killed $eax +; WIN64-NEXT: retq # encoding: [0xc3] +; WIN64-NEXT: .LBB3_12: +; WIN64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; WIN64-NEXT: # kill: def $al killed $al killed $eax +; WIN64-NEXT: retq # encoding: [0xc3] entry: %_M_p.i.i = getelementptr inbounds %"class.std::basic_string", %"class.std::basic_string"* %s, i64 0, i32 0, i32 0 %0 = load i8*, i8** %_M_p.i.i, align 8 @@ -134,12 +572,6 @@ ; Make sure Machine Copy Propagation doesn't delete the mov to %ecx becaue it ; thinks the conditional tail call clobbers it. -; CHECK64-LABEL: .LBB3_11: -; CHECK64: movzbl (%rdi), %ecx -; CHECK64-NEXT: addl $-48, %ecx -; CHECK64-NEXT: cmpl $10, %ecx -; CHECK64-NEXT: movl %r9d, %ecx -; CHECK64-NEXT: jae _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEE if.else28: ; preds = %sw.bb22 %call34 = tail call zeroext i1 @_Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_(i8* nonnull %it.sroa.0.0, i8* %add.ptr.i56) Index: test/CodeGen/X86/loop-search.ll =================================================================== --- test/CodeGen/X86/loop-search.ll +++ test/CodeGen/X86/loop-search.ll @@ -8,26 +8,21 @@ ; CHECK-LABEL: search: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: testl %edx, %edx -; CHECK-NEXT: jle LBB0_1 -; CHECK-NEXT: ## %bb.4: ## %for.body.preheader +; CHECK-NEXT: jle LBB0_5 +; CHECK-NEXT: ## %bb.1: ## %for.body.preheader ; CHECK-NEXT: movslq %edx, %rax ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_5: ## %for.body +; CHECK-NEXT: LBB0_2: ## %for.body ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cmpl %edi, (%rsi,%rcx,4) ; CHECK-NEXT: je LBB0_6 -; CHECK-NEXT: ## %bb.2: ## %for.cond -; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: ## %bb.3: ## %for.cond +; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: incq %rcx ; CHECK-NEXT: cmpq %rax, %rcx -; CHECK-NEXT: jl LBB0_5 -; ### FIXME: %bb.3 and LBB0_1 should be merged -; CHECK-NEXT: ## %bb.3: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: ## kill: def $al killed $al killed $eax -; CHECK-NEXT: retq -; CHECK-NEXT: LBB0_1: +; CHECK-NEXT: jl LBB0_2 +; CHECK-NEXT: LBB0_5: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq @@ -35,6 +30,7 @@ ; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq +; ### FIXME: %bb.3 and LBB0_1 should be merged entry: %cmp5 = icmp sgt i32 %count, 0 br i1 %cmp5, label %for.body.preheader, label %cleanup Index: test/CodeGen/X86/machine-cp.ll =================================================================== --- test/CodeGen/X86/machine-cp.ll +++ test/CodeGen/X86/machine-cp.ll @@ -8,22 +8,21 @@ ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: testl %esi, %esi -; CHECK-NEXT: je LBB0_1 -; CHECK-NEXT: ## %bb.2: ## %while.body.preheader +; CHECK-NEXT: je LBB0_4 +; CHECK-NEXT: ## %bb.1: ## %while.body.preheader ; CHECK-NEXT: movl %esi, %edx ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_3: ## %while.body +; CHECK-NEXT: LBB0_2: ## %while.body ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movl %edx, %ecx ; CHECK-NEXT: cltd ; CHECK-NEXT: idivl %ecx ; CHECK-NEXT: testl %edx, %edx ; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: jne LBB0_3 -; CHECK-NEXT: ## %bb.4: ## %while.end +; CHECK-NEXT: jne LBB0_2 +; CHECK-NEXT: ## %bb.3: ## %while.end ; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: retq -; CHECK-NEXT: LBB0_1: +; CHECK-NEXT: LBB0_4: ; CHECK-NEXT: retq entry: %cmp1 = icmp eq i32 %b, 0 @@ -60,22 +59,21 @@ ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: testq %rsi, %rsi -; CHECK-NEXT: je LBB2_1 -; CHECK-NEXT: ## %bb.2: ## %while.body.preheader +; CHECK-NEXT: je LBB2_4 +; CHECK-NEXT: ## %bb.1: ## %while.body.preheader ; CHECK-NEXT: movq %rsi, %rdx ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB2_3: ## %while.body +; CHECK-NEXT: LBB2_2: ## %while.body ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: cqto ; CHECK-NEXT: idivq %rcx ; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: movq %rcx, %rax -; CHECK-NEXT: jne LBB2_3 -; CHECK-NEXT: ## %bb.4: ## %while.end +; CHECK-NEXT: jne LBB2_2 +; CHECK-NEXT: ## %bb.3: ## %while.end ; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: retq -; CHECK-NEXT: LBB2_1: +; CHECK-NEXT: LBB2_4: ; CHECK-NEXT: retq entry: %cmp1 = icmp eq i64 %b, 0 Index: test/CodeGen/X86/mul-constant-result.ll =================================================================== --- test/CodeGen/X86/mul-constant-result.ll +++ test/CodeGen/X86/mul-constant-result.ll @@ -28,7 +28,7 @@ ; X86-NEXT: .LBB0_4: ; X86-NEXT: decl %ecx ; X86-NEXT: cmpl $31, %ecx -; X86-NEXT: ja .LBB0_39 +; X86-NEXT: ja .LBB0_7 ; X86-NEXT: # %bb.5: ; X86-NEXT: jmpl *.LJTI0_0(,%ecx,4) ; X86-NEXT: .LBB0_6: @@ -36,209 +36,149 @@ ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl -; X86-NEXT: .LBB0_39: -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: .LBB0_40: -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl ; X86-NEXT: .LBB0_7: ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: .LBB0_8: -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: shll $2, %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl -; X86-NEXT: .LBB0_9: -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: leal (%eax,%eax,4), %eax ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_10: ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: addl %eax, %eax -; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: shll $2, %eax ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl -; X86-NEXT: .LBB0_11: +; X86-NEXT: .LBB0_12: ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: leal (,%eax,8), %ecx -; X86-NEXT: jmp .LBB0_12 +; X86-NEXT: addl %eax, %eax +; X86-NEXT: jmp .LBB0_9 ; X86-NEXT: .LBB0_13: -; X86-NEXT: shll $3, %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl +; X86-NEXT: leal (,%eax,8), %ecx +; X86-NEXT: jmp .LBB0_41 ; X86-NEXT: .LBB0_14: -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: leal (%eax,%eax,8), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl -; X86-NEXT: .LBB0_15: -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: addl %eax, %eax -; X86-NEXT: leal (%eax,%eax,4), %eax +; X86-NEXT: shll $3, %eax ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; X86-NEXT: .LBB0_16: ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: leal (%eax,%eax,4), %ecx -; X86-NEXT: leal (%eax,%ecx,2), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl +; X86-NEXT: addl %eax, %eax +; X86-NEXT: jmp .LBB0_11 ; X86-NEXT: .LBB0_17: -; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: jmp .LBB0_18 +; X86-NEXT: .LBB0_19: ; X86-NEXT: shll $2, %eax -; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl -; X86-NEXT: .LBB0_18: -; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: jmp .LBB0_9 +; X86-NEXT: .LBB0_20: ; X86-NEXT: leal (%eax,%eax,2), %ecx -; X86-NEXT: leal (%eax,%ecx,4), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl -; X86-NEXT: .LBB0_19: -; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: jmp .LBB0_21 +; X86-NEXT: .LBB0_22: ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll $4, %ecx ; X86-NEXT: subl %eax, %ecx -; X86-NEXT: jmp .LBB0_12 -; X86-NEXT: .LBB0_21: +; X86-NEXT: jmp .LBB0_41 +; X86-NEXT: .LBB0_23: ; X86-NEXT: leal (%eax,%eax,4), %eax -; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl -; X86-NEXT: .LBB0_22: -; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: jmp .LBB0_9 +; X86-NEXT: .LBB0_24: ; X86-NEXT: shll $4, %eax ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl -; X86-NEXT: .LBB0_23: +; X86-NEXT: .LBB0_25: ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll $4, %ecx -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl -; X86-NEXT: .LBB0_24: -; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: jmp .LBB0_26 +; X86-NEXT: .LBB0_27: ; X86-NEXT: addl %eax, %eax +; X86-NEXT: .LBB0_15: ; X86-NEXT: leal (%eax,%eax,8), %eax ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl -; X86-NEXT: .LBB0_25: +; X86-NEXT: .LBB0_28: ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: .LBB0_18: ; X86-NEXT: leal (%eax,%ecx,2), %eax ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl -; X86-NEXT: .LBB0_26: +; X86-NEXT: .LBB0_29: ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: shll $2, %eax -; X86-NEXT: leal (%eax,%eax,4), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl -; X86-NEXT: .LBB0_27: -; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: jmp .LBB0_11 +; X86-NEXT: .LBB0_30: ; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: .LBB0_21: ; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl -; X86-NEXT: .LBB0_28: +; X86-NEXT: .LBB0_31: ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: leal (%eax,%eax,4), %ecx ; X86-NEXT: leal (%eax,%ecx,4), %ecx -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl -; X86-NEXT: .LBB0_29: -; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: jmp .LBB0_26 +; X86-NEXT: .LBB0_32: ; X86-NEXT: leal (%eax,%eax,2), %ecx ; X86-NEXT: shll $3, %ecx -; X86-NEXT: jmp .LBB0_12 -; X86-NEXT: .LBB0_30: +; X86-NEXT: jmp .LBB0_41 +; X86-NEXT: .LBB0_33: ; X86-NEXT: shll $3, %eax -; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl -; X86-NEXT: .LBB0_31: -; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: jmp .LBB0_9 +; X86-NEXT: .LBB0_34: ; X86-NEXT: leal (%eax,%eax,4), %eax +; X86-NEXT: .LBB0_11: ; X86-NEXT: leal (%eax,%eax,4), %eax ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl -; X86-NEXT: .LBB0_32: +; X86-NEXT: .LBB0_35: ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: leal (%eax,%eax,4), %ecx ; X86-NEXT: leal (%ecx,%ecx,4), %ecx -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl -; X86-NEXT: .LBB0_33: -; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: jmp .LBB0_26 +; X86-NEXT: .LBB0_36: ; X86-NEXT: leal (%eax,%eax,8), %eax +; X86-NEXT: .LBB0_9: ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl -; X86-NEXT: .LBB0_34: +; X86-NEXT: .LBB0_37: ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: leal (%eax,%eax,8), %ecx ; X86-NEXT: leal (%ecx,%ecx,2), %ecx -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl -; X86-NEXT: .LBB0_35: -; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: jmp .LBB0_26 +; X86-NEXT: .LBB0_38: ; X86-NEXT: leal (%eax,%eax,8), %ecx ; X86-NEXT: leal (%ecx,%ecx,2), %ecx ; X86-NEXT: addl %eax, %ecx +; X86-NEXT: .LBB0_26: ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl -; X86-NEXT: .LBB0_36: +; X86-NEXT: .LBB0_39: ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll $5, %ecx ; X86-NEXT: subl %eax, %ecx -; X86-NEXT: jmp .LBB0_12 -; X86-NEXT: .LBB0_37: +; X86-NEXT: jmp .LBB0_41 +; X86-NEXT: .LBB0_40: ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll $5, %ecx -; X86-NEXT: .LBB0_12: +; X86-NEXT: .LBB0_41: ; X86-NEXT: subl %eax, %ecx ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl -; X86-NEXT: .LBB0_38: +; X86-NEXT: .LBB0_42: ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: shll $5, %eax ; X86-NEXT: popl %esi @@ -256,158 +196,149 @@ ; X64-HSW-NEXT: cmovel %ecx, %eax ; X64-HSW-NEXT: decl %edi ; X64-HSW-NEXT: cmpl $31, %edi -; X64-HSW-NEXT: ja .LBB0_36 +; X64-HSW-NEXT: ja .LBB0_3 ; X64-HSW-NEXT: # %bb.1: ; X64-HSW-NEXT: jmpq *.LJTI0_0(,%rdi,8) ; X64-HSW-NEXT: .LBB0_2: ; X64-HSW-NEXT: addl %eax, %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_36: -; X64-HSW-NEXT: xorl %eax, %eax -; X64-HSW-NEXT: .LBB0_37: -; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax -; X64-HSW-NEXT: retq ; X64-HSW-NEXT: .LBB0_3: -; X64-HSW-NEXT: leal (%rax,%rax,2), %eax -; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax -; X64-HSW-NEXT: retq +; X64-HSW-NEXT: xorl %eax, %eax ; X64-HSW-NEXT: .LBB0_4: -; X64-HSW-NEXT: shll $2, %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_5: -; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: .LBB0_6: +; X64-HSW-NEXT: shll $2, %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_6: +; X64-HSW-NEXT: .LBB0_8: ; X64-HSW-NEXT: addl %eax, %eax +; X64-HSW-NEXT: .LBB0_5: ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_7: -; X64-HSW-NEXT: leal (,%rax,8), %ecx -; X64-HSW-NEXT: jmp .LBB0_8 ; X64-HSW-NEXT: .LBB0_9: -; X64-HSW-NEXT: shll $3, %eax -; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax -; X64-HSW-NEXT: retq +; X64-HSW-NEXT: leal (,%rax,8), %ecx +; X64-HSW-NEXT: jmp .LBB0_37 ; X64-HSW-NEXT: .LBB0_10: -; X64-HSW-NEXT: leal (%rax,%rax,8), %eax +; X64-HSW-NEXT: shll $3, %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_11: +; X64-HSW-NEXT: .LBB0_12: ; X64-HSW-NEXT: addl %eax, %eax +; X64-HSW-NEXT: .LBB0_7: ; X64-HSW-NEXT: leal (%rax,%rax,4), %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_12: +; X64-HSW-NEXT: .LBB0_13: ; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx ; X64-HSW-NEXT: leal (%rax,%rcx,2), %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_13: +; X64-HSW-NEXT: .LBB0_15: ; X64-HSW-NEXT: shll $2, %eax ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_14: +; X64-HSW-NEXT: .LBB0_16: ; X64-HSW-NEXT: leal (%rax,%rax,2), %ecx ; X64-HSW-NEXT: leal (%rax,%rcx,4), %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_15: +; X64-HSW-NEXT: .LBB0_18: ; X64-HSW-NEXT: movl %eax, %ecx ; X64-HSW-NEXT: shll $4, %ecx ; X64-HSW-NEXT: subl %eax, %ecx -; X64-HSW-NEXT: jmp .LBB0_8 -; X64-HSW-NEXT: .LBB0_17: +; X64-HSW-NEXT: jmp .LBB0_37 +; X64-HSW-NEXT: .LBB0_19: ; X64-HSW-NEXT: leal (%rax,%rax,4), %eax ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_18: +; X64-HSW-NEXT: .LBB0_20: ; X64-HSW-NEXT: shll $4, %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_19: +; X64-HSW-NEXT: .LBB0_21: ; X64-HSW-NEXT: movl %eax, %ecx ; X64-HSW-NEXT: shll $4, %ecx -; X64-HSW-NEXT: jmp .LBB0_20 -; X64-HSW-NEXT: .LBB0_21: +; X64-HSW-NEXT: jmp .LBB0_34 +; X64-HSW-NEXT: .LBB0_22: ; X64-HSW-NEXT: addl %eax, %eax +; X64-HSW-NEXT: .LBB0_11: ; X64-HSW-NEXT: leal (%rax,%rax,8), %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_22: +; X64-HSW-NEXT: .LBB0_23: ; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx ; X64-HSW-NEXT: leal (%rax,%rcx,2), %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_23: +; X64-HSW-NEXT: .LBB0_24: ; X64-HSW-NEXT: shll $2, %eax ; X64-HSW-NEXT: leal (%rax,%rax,4), %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_24: +; X64-HSW-NEXT: .LBB0_25: ; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx ; X64-HSW-NEXT: leal (%rax,%rcx,4), %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_25: +; X64-HSW-NEXT: .LBB0_26: ; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx ; X64-HSW-NEXT: leal (%rax,%rcx,4), %ecx -; X64-HSW-NEXT: jmp .LBB0_20 -; X64-HSW-NEXT: .LBB0_26: +; X64-HSW-NEXT: jmp .LBB0_34 +; X64-HSW-NEXT: .LBB0_27: ; X64-HSW-NEXT: leal (%rax,%rax,2), %ecx ; X64-HSW-NEXT: shll $3, %ecx -; X64-HSW-NEXT: jmp .LBB0_8 -; X64-HSW-NEXT: .LBB0_27: +; X64-HSW-NEXT: jmp .LBB0_37 +; X64-HSW-NEXT: .LBB0_28: ; X64-HSW-NEXT: shll $3, %eax ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_28: +; X64-HSW-NEXT: .LBB0_29: ; X64-HSW-NEXT: leal (%rax,%rax,4), %eax ; X64-HSW-NEXT: leal (%rax,%rax,4), %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_29: +; X64-HSW-NEXT: .LBB0_30: ; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx ; X64-HSW-NEXT: leal (%rcx,%rcx,4), %ecx -; X64-HSW-NEXT: jmp .LBB0_20 -; X64-HSW-NEXT: .LBB0_30: +; X64-HSW-NEXT: jmp .LBB0_34 +; X64-HSW-NEXT: .LBB0_31: ; X64-HSW-NEXT: leal (%rax,%rax,8), %eax ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_31: +; X64-HSW-NEXT: .LBB0_32: ; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx ; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx -; X64-HSW-NEXT: jmp .LBB0_20 -; X64-HSW-NEXT: .LBB0_32: +; X64-HSW-NEXT: jmp .LBB0_34 +; X64-HSW-NEXT: .LBB0_33: ; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx ; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx ; X64-HSW-NEXT: addl %eax, %ecx -; X64-HSW-NEXT: .LBB0_20: +; X64-HSW-NEXT: .LBB0_34: ; X64-HSW-NEXT: addl %eax, %ecx ; X64-HSW-NEXT: movl %ecx, %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_33: +; X64-HSW-NEXT: .LBB0_35: ; X64-HSW-NEXT: movl %eax, %ecx ; X64-HSW-NEXT: shll $5, %ecx ; X64-HSW-NEXT: subl %eax, %ecx -; X64-HSW-NEXT: jmp .LBB0_8 -; X64-HSW-NEXT: .LBB0_34: +; X64-HSW-NEXT: jmp .LBB0_37 +; X64-HSW-NEXT: .LBB0_36: ; X64-HSW-NEXT: movl %eax, %ecx ; X64-HSW-NEXT: shll $5, %ecx -; X64-HSW-NEXT: .LBB0_8: +; X64-HSW-NEXT: .LBB0_37: ; X64-HSW-NEXT: subl %eax, %ecx ; X64-HSW-NEXT: movl %ecx, %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_35: +; X64-HSW-NEXT: .LBB0_39: ; X64-HSW-NEXT: shll $5, %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq Index: test/CodeGen/X86/tail-merge-after-mbp.mir =================================================================== --- test/CodeGen/X86/tail-merge-after-mbp.mir +++ test/CodeGen/X86/tail-merge-after-mbp.mir @@ -1,34 +1,47 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=x86_64-linux -run-pass=block-placement -o - %s | FileCheck %s --- # check loop bb.7 is not merged with bb.10, bb.13 # check loop bb.9 is not merged with bb.12 -# CHECK: bb.2: -# CHECK-NEXT: successors: %bb.3(0x30000000), %bb.4(0x50000000) -# CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg -# CHECK-NEXT: TEST64rr $rax, $rax -# CHECK-NEXT: JCC_1 %bb.3, 4 -# CHECK: bb.4: -# CHECK-NEXT: successors: %bb.5(0x30000000), %bb.10(0x50000000) -# CHECK: CMP64mi8 killed $rax, 1, $noreg, 8, $noreg, 0 -# CHECK-NEXT: JCC_1 %bb.10, 5 -# CHECK: bb.5: -# CHECK-NEXT: successors: %bb.6(0x30000000), %bb.7(0x50000000) -# CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg -# CHECK-NEXT: TEST64rr $rax, $rax -# CHECK-NEXT: JCC_1 %bb.6, 4 -# CHECK: bb.7 -# CHECK-NEXT: successors: %bb.8(0x71555555), %bb.10(0x0eaaaaab) -# CHECK: CMP64mi8 killed $rax, 1, $noreg, 8, $noreg, 0 -# CHECK-NEXT: JCC_1 %bb.10, 5 -# CHECK: bb.8: -# CHECK-NEXT: successors: %bb.9(0x04000000), %bb.7(0x7c000000) -# CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg -# CHECK-NEXT: TEST64rr $rax, $rax -# CHECK-NEXT: JCC_1 %bb.7, 5 name: foo body: | + ; CHECK-LABEL: name: foo + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: TEST8ri $dl, 1, implicit-def $eflags, implicit killed $edx + ; CHECK: JCC_1 %bb.1, 5, implicit $eflags + ; CHECK: bb.2: + ; CHECK: successors: %bb.1(0x30000000), %bb.3(0x50000000) + ; CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load 8) + ; CHECK: TEST64rr $rax, $rax, implicit-def $eflags + ; CHECK: JCC_1 %bb.1, 4, implicit $eflags + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x30000000), %bb.8(0x50000000) + ; CHECK: CMP64mi8 killed $rax, 1, $noreg, 8, $noreg, 0, implicit-def $eflags :: (load 8) + ; CHECK: JCC_1 %bb.8, 5, implicit $eflags + ; CHECK: bb.4: + ; CHECK: successors: %bb.1(0x30000000), %bb.5(0x50000000) + ; CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load 8) + ; CHECK: TEST64rr $rax, $rax, implicit-def $eflags + ; CHECK: JCC_1 %bb.1, 4, implicit $eflags + ; CHECK: bb.5 (align 4): + ; CHECK: successors: %bb.6(0x71555555), %bb.8(0x0eaaaaab) + ; CHECK: CMP64mi8 killed $rax, 1, $noreg, 8, $noreg, 0, implicit-def $eflags :: (load 8), (load 8) + ; CHECK: JCC_1 %bb.8, 5, implicit $eflags + ; CHECK: bb.6: + ; CHECK: successors: %bb.1(0x04000000), %bb.5(0x7c000000) + ; CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load 8) + ; CHECK: TEST64rr $rax, $rax, implicit-def $eflags + ; CHECK: JCC_1 %bb.5, 5, implicit $eflags + ; CHECK: bb.1: + ; CHECK: $ebp = XOR32rr undef $ebp, undef $ebp, implicit-def dead $eflags + ; CHECK: RETQ $eax + ; CHECK: bb.8: + ; CHECK: $ebp = XOR32rr undef $ebp, undef $ebp, implicit-def dead $eflags + ; CHECK: dead $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags, implicit-def $al + ; CHECK: RETQ $eax bb.0: successors: %bb.1(0x40000000), %bb.7(0x40000000) Index: test/CodeGen/X86/tail-opts.ll =================================================================== --- test/CodeGen/X86/tail-opts.ll +++ test/CodeGen/X86/tail-opts.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false -post-RA-scheduler=true | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -post-RA-scheduler=true | FileCheck %s declare void @bar(i32) declare void @car(i32) @@ -13,13 +14,44 @@ ; BranchFolding should tail-merge the stores since they all precede ; direct branches to the same place. -; CHECK-LABEL: tail_merge_me: -; CHECK-NOT: GHJK -; CHECK: movl $0, GHJK(%rip) -; CHECK-NEXT: movl $1, HABC(%rip) -; CHECK-NOT: GHJK - define void @tail_merge_me() nounwind { +; CHECK-LABEL: tail_merge_me: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # %bb.6: # %A +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: callq bar +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_1: # %next +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB0_3 +; CHECK-NEXT: # %bb.2: # %B +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: callq car +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_3: # %C +; CHECK-NEXT: movl $2, %edi +; CHECK-NEXT: callq dar +; CHECK-NEXT: .LBB0_4: # %M +; CHECK-NEXT: movl $0, {{.*}}(%rip) +; CHECK-NEXT: movl $1, {{.*}}(%rip) +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB0_5 +; CHECK-NEXT: # %bb.7: # %return +; CHECK-NEXT: movl $1000, %edi # imm = 0x3E8 +; CHECK-NEXT: callq ear +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_5: # %altret +; CHECK-NEXT: movl $1001, %edi # imm = 0x3E9 +; CHECK-NEXT: callq far +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %a = call i1 @qux() br i1 %a, label %A, label %next @@ -60,15 +92,53 @@ ; BranchFolding should tail-duplicate the indirect jump to avoid ; redundant branching. -; CHECK-LABEL: tail_duplicate_me: -; CHECK: movl $0, GHJK(%rip) -; CHECK-NEXT: jmpq *%r -; CHECK: movl $0, GHJK(%rip) -; CHECK-NEXT: jmpq *%r -; CHECK: movl $0, GHJK(%rip) -; CHECK-NEXT: jmpq *%r - define void @tail_duplicate_me() nounwind { +; CHECK-LABEL: tail_duplicate_me: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq qux +; CHECK-NEXT: movl $.Ltmp0, %edi +; CHECK-NEXT: movl $.Ltmp1, %esi +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: callq choose +; CHECK-NEXT: movq %rax, %r14 +; CHECK-NEXT: testb $1, %bl +; CHECK-NEXT: je .LBB1_1 +; CHECK-NEXT: # %bb.7: # %A +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: callq bar +; CHECK-NEXT: movl $0, {{.*}}(%rip) +; CHECK-NEXT: jmpq *%r14 +; CHECK-NEXT: .Ltmp0: # Block address taken +; CHECK-NEXT: .LBB1_4: # %return +; CHECK-NEXT: movl $1000, %edi # imm = 0x3E8 +; CHECK-NEXT: callq ear +; CHECK-NEXT: jmp .LBB1_5 +; CHECK-NEXT: .LBB1_1: # %next +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB1_3 +; CHECK-NEXT: # %bb.2: # %B +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: callq car +; CHECK-NEXT: movl $0, {{.*}}(%rip) +; CHECK-NEXT: jmpq *%r14 +; CHECK-NEXT: .Ltmp1: # Block address taken +; CHECK-NEXT: .LBB1_6: # %altret +; CHECK-NEXT: movl $1001, %edi # imm = 0x3E9 +; CHECK-NEXT: callq far +; CHECK-NEXT: .LBB1_5: # %return +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB1_3: # %C +; CHECK-NEXT: movl $2, %edi +; CHECK-NEXT: callq dar +; CHECK-NEXT: movl $0, {{.*}}(%rip) +; CHECK-NEXT: jmpq *%r14 entry: %a = call i1 @qux() %c = call i8* @choose(i8* blockaddress(@tail_duplicate_me, %return), @@ -107,23 +177,26 @@ ; BranchFolding shouldn't try to merge the tails of two blocks ; with only a branch in common, regardless of the fallthrough situation. -; CHECK-LABEL: dont_merge_oddly: -; CHECK-NOT: ret -; CHECK: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}} -; CHECK-NEXT: jbe .LBB2_3 -; CHECK-NEXT: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}} -; CHECK-NEXT: ja .LBB2_4 -; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB2_3: -; CHECK-NEXT: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}} -; CHECK-NEXT: jbe .LBB2_2 -; CHECK-NEXT: .LBB2_4: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: ret - define i1 @dont_merge_oddly(float* %result) nounwind { +; CHECK-LABEL: dont_merge_oddly: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: ucomiss %xmm1, %xmm2 +; CHECK-NEXT: jbe .LBB2_3 +; CHECK-NEXT: # %bb.1: # %bb +; CHECK-NEXT: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: ja .LBB2_4 +; CHECK-NEXT: .LBB2_2: # %bb30 +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB2_3: # %bb21 +; CHECK-NEXT: ucomiss %xmm0, %xmm2 +; CHECK-NEXT: jbe .LBB2_2 +; CHECK-NEXT: .LBB2_4: # %bb26 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq entry: %tmp4 = getelementptr float, float* %result, i32 2 %tmp5 = load float, float* %tmp4, align 4 @@ -151,22 +224,9 @@ ; Do any-size tail-merging when two candidate blocks will both require ; an unconditional jump to complete a two-way conditional branch. - -; CHECK-LABEL: c_expand_expr_stmt: ; ; This test only works when register allocation happens to use %rax for both ; load addresses. -; -; CHE: jmp .LBB3_11 -; CHE-NEXT: .LBB3_9: -; CHE-NEXT: movq 8(%rax), %rax -; CHE-NEXT: xorl %edx, %edx -; CHE-NEXT: movb 16(%rax), %al -; CHE-NEXT: cmpb $16, %al -; CHE-NEXT: je .LBB3_11 -; CHE-NEXT: cmpb $23, %al -; CHE-NEXT: jne .LBB3_14 -; CHE-NEXT: .LBB3_11: %0 = type { %struct.rtx_def* } %struct.lang_decl = type opaque @@ -177,6 +237,80 @@ %union.tree_node = type { %struct.tree_decl } define fastcc void @c_expand_expr_stmt(%union.tree_node* %expr) nounwind { +; CHECK-LABEL: c_expand_expr_stmt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB3_17 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movb 0, %bl +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB3_16 +; CHECK-NEXT: # %bb.2: # %bb.i +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je .LBB3_16 +; CHECK-NEXT: # %bb.3: # %lvalue_p.exit +; CHECK-NEXT: movq 0, %rax +; CHECK-NEXT: movzbl (%rax), %ecx +; CHECK-NEXT: testl %ecx, %ecx +; CHECK-NEXT: je .LBB3_12 +; CHECK-NEXT: # %bb.4: # %lvalue_p.exit +; CHECK-NEXT: cmpl $2, %ecx +; CHECK-NEXT: jne .LBB3_5 +; CHECK-NEXT: # %bb.6: # %bb.i1 +; CHECK-NEXT: movq 32(%rax), %rax +; CHECK-NEXT: movzbl 16(%rax), %ecx +; CHECK-NEXT: testl %ecx, %ecx +; CHECK-NEXT: je .LBB3_10 +; CHECK-NEXT: # %bb.7: # %bb.i1 +; CHECK-NEXT: cmpl $2, %ecx +; CHECK-NEXT: jne .LBB3_8 +; CHECK-NEXT: # %bb.9: # %bb.i.i +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: callq lvalue_p +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: setne %al +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je .LBB3_15 +; CHECK-NEXT: jmp .LBB3_17 +; CHECK-NEXT: .LBB3_16: # %bb1 +; CHECK-NEXT: cmpb $23, %bl +; CHECK-NEXT: .LBB3_17: # %bb3 +; CHECK-NEXT: .LBB3_12: # %bb2.i3 +; CHECK-NEXT: movq 8(%rax), %rax +; CHECK-NEXT: movb 16(%rax), %cl +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpb $23, %cl +; CHECK-NEXT: je .LBB3_14 +; CHECK-NEXT: # %bb.13: # %bb2.i3 +; CHECK-NEXT: cmpb $16, %cl +; CHECK-NEXT: je .LBB3_14 +; CHECK-NEXT: jmp .LBB3_17 +; CHECK-NEXT: .LBB3_5: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je .LBB3_15 +; CHECK-NEXT: jmp .LBB3_17 +; CHECK-NEXT: .LBB3_10: # %bb2.i.i2 +; CHECK-NEXT: movq 8(%rax), %rax +; CHECK-NEXT: movb 16(%rax), %cl +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpb $16, %cl +; CHECK-NEXT: je .LBB3_14 +; CHECK-NEXT: # %bb.11: # %bb2.i.i2 +; CHECK-NEXT: cmpb $23, %cl +; CHECK-NEXT: je .LBB3_14 +; CHECK-NEXT: jmp .LBB3_17 +; CHECK-NEXT: .LBB3_8: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: .LBB3_14: # %lvalue_p.exit4 +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB3_17 +; CHECK-NEXT: .LBB3_15: # %lvalue_p.exit4 +; CHECK-NEXT: testb %bl, %bl entry: %tmp4 = load i8, i8* null, align 8 ; [#uses=3] switch i8 %tmp4, label %bb3 [ @@ -274,13 +408,17 @@ ; instructions are involved. This function should have only ; one ret instruction. -; CHECK-LABEL: foo: -; CHECK: callq func -; CHECK-NEXT: popq -; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: ret - define void @foo(i1* %V) nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: je .LBB4_2 +; CHECK-NEXT: # %bb.1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq func +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .LBB4_2: # %return +; CHECK-NEXT: retq entry: %t0 = icmp eq i1* %V, null br i1 %t0, label %return, label %bb @@ -297,15 +435,25 @@ ; one - One instruction may be tail-duplicated even with optsize. -; CHECK-LABEL: one: -; CHECK: j{{.*}} tail_call_me -; CHECK: j{{.*}} tail_call_me - @XYZ = external global i32 declare void @tail_call_me() define void @one(i32 %v) nounwind optsize { +; CHECK-LABEL: one: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je .LBB5_3 +; CHECK-NEXT: # %bb.1: # %bby +; CHECK-NEXT: cmpl $16, %edi +; CHECK-NEXT: je .LBB5_4 +; CHECK-NEXT: # %bb.2: # %bb7 +; CHECK-NEXT: jmp tail_call_me # TAILCALL +; CHECK-NEXT: .LBB5_3: # %bbx +; CHECK-NEXT: cmpl $128, %edi +; CHECK-NEXT: jne tail_call_me # TAILCALL +; CHECK-NEXT: .LBB5_4: # %return +; CHECK-NEXT: retq entry: %0 = icmp eq i32 %v, 0 br i1 %0, label %bbx, label %bby @@ -336,14 +484,19 @@ ; tail instead of one. This is too much to be merged, given ; the optsize attribute. -; CHECK-LABEL: two: -; CHECK-NOT: XYZ -; CHECK: ret -; CHECK: movl $0, XYZ(%rip) -; CHECK: movl $1, XYZ(%rip) -; CHECK-NOT: XYZ - define void @two() nounwind optsize { +; CHECK-LABEL: two: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je .LBB6_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB6_1: # %bb7 +; CHECK-NEXT: movl $0, {{.*}}(%rip) +; CHECK-NEXT: movl $1, {{.*}}(%rip) entry: %0 = icmp eq i32 undef, 0 br i1 %0, label %bbx, label %bby @@ -374,14 +527,19 @@ ; two_minsize - Same as two, but with minsize instead of optsize. -; CHECK-LABEL: two_minsize: -; CHECK-NOT: XYZ -; CHECK: ret -; CHECK: movl $0, XYZ(%rip) -; CHECK: movl $1, XYZ(%rip) -; CHECK-NOT: XYZ - define void @two_minsize() nounwind minsize { +; CHECK-LABEL: two_minsize: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je .LBB7_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB7_1: # %bb7 +; CHECK-NEXT: movl $0, {{.*}}(%rip) +; CHECK-NEXT: movl $1, {{.*}}(%rip) entry: %0 = icmp eq i32 undef, 0 br i1 %0, label %bbx, label %bby @@ -413,13 +571,20 @@ ; two_nosize - Same as two, but without the optsize attribute. ; Now two instructions are enough to be tail-duplicated. -; CHECK-LABEL: two_nosize: -; CHECK: movl $0, XYZ(%rip) -; CHECK: jmp tail_call_me -; CHECK: movl $0, XYZ(%rip) -; CHECK: jmp tail_call_me - define void @two_nosize() nounwind { +; CHECK-LABEL: two_nosize: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: # %bb.1: # %bby +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je .LBB8_2 +; CHECK-NEXT: # %bb.4: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB8_2: # %bb7 +; CHECK-NEXT: movl $0, {{.*}}(%rip) +; CHECK-NEXT: jmp tail_call_me # TAILCALL entry: %0 = icmp eq i32 undef, 0 br i1 %0, label %bbx, label %bby @@ -451,12 +616,19 @@ ; Tail-merging should merge the two ret instructions since one side ; can fall-through into the ret and the other side has to branch anyway. -; CHECK-LABEL: TESTE: -; CHECK: ret -; CHECK-NOT: ret -; CHECK: size TESTE - define i64 @TESTE(i64 %parami, i64 %paraml) nounwind readnone { +; CHECK-LABEL: TESTE: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmovgq %rdi, %rax +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: jle .LBB9_2 +; CHECK-NEXT: # %bb.1: # %bb.nph +; CHECK-NEXT: imulq %rdi, %rsi +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: .LBB9_2: # %for.end +; CHECK-NEXT: retq entry: %cmp = icmp slt i64 %parami, 1 ; [#uses=1] %varx.0 = select i1 %cmp, i64 1, i64 %parami ; [#uses=1] @@ -476,15 +648,34 @@ ; out-of-line after the main return, so we should try to eliminate as many of ; them as possible. -; CHECK-LABEL: merge_aborts: -; CHECK-NOT: callq abort -; CHECK: ret -; CHECK: callq abort -; CHECK-NOT: callq abort -; CHECK: .Lfunc_end{{.*}}: - declare void @abort() define void @merge_aborts() { +; CHECK-LABEL: merge_aborts: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB10_5 +; CHECK-NEXT: # %bb.1: # %cont1 +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB10_5 +; CHECK-NEXT: # %bb.2: # %cont2 +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB10_5 +; CHECK-NEXT: # %bb.3: # %cont3 +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB10_5 +; CHECK-NEXT: # %bb.4: # %cont4 +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB10_5: # %abort1 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq abort entry: %c1 = call i1 @qux() br i1 %c1, label %cont1, label %abort1 @@ -516,18 +707,37 @@ ; Use alternating abort functions so that the blocks we wish to merge are not ; layout successors during branch folding. -; CHECK-LABEL: merge_alternating_aborts: -; CHECK-NOT: callq abort -; CHECK: ret -; CHECK: callq abort -; CHECK: callq alt_abort -; CHECK-NOT: callq abort -; CHECK-NOT: callq alt_abort -; CHECK: .Lfunc_end{{.*}}: - declare void @alt_abort() define void @merge_alternating_aborts() { +; CHECK-LABEL: merge_alternating_aborts: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB11_5 +; CHECK-NEXT: # %bb.1: # %cont1 +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB11_6 +; CHECK-NEXT: # %bb.2: # %cont2 +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB11_5 +; CHECK-NEXT: # %bb.3: # %cont3 +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB11_6 +; CHECK-NEXT: # %bb.4: # %cont4 +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB11_5: # %abort1 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq abort +; CHECK-NEXT: .LBB11_6: # %abort2 +; CHECK-NEXT: callq alt_abort entry: %c1 = call i1 @qux() br i1 %c1, label %cont1, label %abort1 Index: test/CodeGen/X86/tail-threshold.ll =================================================================== --- test/CodeGen/X86/tail-threshold.ll +++ test/CodeGen/X86/tail-threshold.ll @@ -1,17 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-pc-linux-gnu -tail-merge-threshold 2 < %s | FileCheck %s ; Test that we still do some merging if a block has more than ; tail-merge-threshold predecessors. -; CHECK: callq bar -; CHECK: callq bar -; CHECK: callq bar -; CHECK-NOT: callq - declare void @bar() -define void @foo(i32 %xxx) { -entry: +define void @foo(i32 %xxx) nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: cmpl $3, %edi +; CHECK-NEXT: ja .LBB0_4 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8) +; CHECK-NEXT: .LBB0_3: # %bb3 +; CHECK-NEXT: callq bar +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_4: # %bb4 +; CHECK-NEXT: callq bar +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq switch i32 %xxx, label %bb4 [ i32 0, label %bb0 i32 1, label %bb1 Index: test/CodeGen/X86/test-shrink-bug.ll =================================================================== --- test/CodeGen/X86/test-shrink-bug.ll +++ test/CodeGen/X86/test-shrink-bug.ll @@ -52,15 +52,13 @@ ; CHECK-X86-NEXT: cmpb $123, {{[0-9]+}}(%esp) ; CHECK-X86-NEXT: sete %al ; CHECK-X86-NEXT: testl $263, %ecx ## imm = 0x107 -; CHECK-X86-NEXT: je LBB1_2 +; CHECK-X86-NEXT: je LBB1_3 ; CHECK-X86-NEXT: ## %bb.1: ; CHECK-X86-NEXT: testb %al, %al -; CHECK-X86-NEXT: jne LBB1_2 -; CHECK-X86-NEXT: ## %bb.3: ## %no +; CHECK-X86-NEXT: jne LBB1_3 +; CHECK-X86-NEXT: ## %bb.2: ## %no ; CHECK-X86-NEXT: calll _bar -; CHECK-X86-NEXT: addl $12, %esp -; CHECK-X86-NEXT: retl -; CHECK-X86-NEXT: LBB1_2: ## %yes +; CHECK-X86-NEXT: LBB1_3: ## %yes ; CHECK-X86-NEXT: addl $12, %esp ; CHECK-X86-NEXT: retl ; @@ -69,7 +67,7 @@ ; CHECK-X64-NEXT: pushq %rax ; CHECK-X64-NEXT: .cfi_def_cfa_offset 16 ; CHECK-X64-NEXT: testl $263, %edi # imm = 0x107 -; CHECK-X64-NEXT: je .LBB1_2 +; CHECK-X64-NEXT: je .LBB1_3 ; CHECK-X64-NEXT: # %bb.1: ; CHECK-X64-NEXT: pand {{.*}}(%rip), %xmm0 ; CHECK-X64-NEXT: pcmpeqd {{.*}}(%rip), %xmm0 @@ -77,14 +75,10 @@ ; CHECK-X64-NEXT: pand %xmm0, %xmm1 ; CHECK-X64-NEXT: pextrw $4, %xmm1, %eax ; CHECK-X64-NEXT: testb $1, %al -; CHECK-X64-NEXT: jne .LBB1_2 -; CHECK-X64-NEXT: # %bb.3: # %no +; CHECK-X64-NEXT: jne .LBB1_3 +; CHECK-X64-NEXT: # %bb.2: # %no ; CHECK-X64-NEXT: callq bar -; CHECK-X64-NEXT: popq %rax -; CHECK-X64-NEXT: .cfi_def_cfa_offset 8 -; CHECK-X64-NEXT: retq -; CHECK-X64-NEXT: .LBB1_2: # %yes -; CHECK-X64-NEXT: .cfi_def_cfa_offset 16 +; CHECK-X64-NEXT: .LBB1_3: # %yes ; CHECK-X64-NEXT: popq %rax ; CHECK-X64-NEXT: .cfi_def_cfa_offset 8 ; CHECK-X64-NEXT: retq