Index: test/CodeGen/X86/atom-bypass-slow-division-64.ll =================================================================== --- test/CodeGen/X86/atom-bypass-slow-division-64.ll +++ test/CodeGen/X86/atom-bypass-slow-division-64.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mcpu=atom -march=x86-64 | FileCheck %s target triple = "x86_64-unknown-linux-gnu" @@ -6,44 +7,70 @@ define i64 @Test_get_quotient(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: Test_get_quotient: -; CHECK: movq %rdi, %rax -; CHECK: orq %rsi, %rax -; CHECK-NEXT: testq $-65536, %rax -; CHECK-NEXT: je -; CHECK: idivq -; CHECK: ret -; CHECK: divw -; CHECK: ret +; CHECK: # BB#0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: testq $-65536, %rax # imm = 0xFFFF0000 +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: cqto +; CHECK-NEXT: idivq %rsi +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: divw %si +; CHECK-NEXT: movzwl %ax, %eax +; CHECK-NEXT: retq %result = sdiv i64 %a, %b ret i64 %result } define i64 @Test_get_remainder(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: Test_get_remainder: -; CHECK: movq %rdi, %rax -; CHECK: orq %rsi, %rax -; CHECK-NEXT: testq $-65536, %rax -; CHECK-NEXT: je -; CHECK: idivq -; CHECK: ret -; CHECK: divw -; CHECK: ret +; CHECK: # BB#0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: testq $-65536, %rax # imm = 0xFFFF0000 +; CHECK-NEXT: je .LBB1_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: cqto +; CHECK-NEXT: idivq %rsi +; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB1_1: +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: divw %si +; CHECK-NEXT: movzwl %dx, %eax +; CHECK-NEXT: retq %result = srem i64 %a, %b ret i64 %result } define i64 @Test_get_quotient_and_remainder(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: Test_get_quotient_and_remainder: -; CHECK: movq %rdi, %rax -; CHECK: orq %rsi, %rax -; CHECK-NEXT: testq $-65536, %rax -; CHECK-NEXT: je -; CHECK: idivq -; CHECK: divw -; CHECK: addq -; CHECK: ret -; CHECK-NOT: idivq -; CHECK-NOT: divw +; CHECK: # BB#0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: testq $-65536, %rax # imm = 0xFFFF0000 +; CHECK-NEXT: je .LBB2_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: cqto +; CHECK-NEXT: idivq %rsi +; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB2_1: +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: divw %si +; CHECK-NEXT: movzwl %ax, %eax +; CHECK-NEXT: movzwl %dx, %edx +; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: retq %resultdiv = sdiv i64 %a, %b %resultrem = srem i64 %a, %b %result = add i64 %resultdiv, %resultrem Index: test/CodeGen/X86/atom-bypass-slow-division.ll =================================================================== --- test/CodeGen/X86/atom-bypass-slow-division.ll +++ test/CodeGen/X86/atom-bypass-slow-division.ll @@ -1,42 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s define i32 @Test_get_quotient(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: Test_get_quotient: -; CHECK: orl %ecx, %edx -; CHECK-NEXT: testl $-256, %edx -; CHECK-NEXT: je -; CHECK: idivl -; CHECK: ret -; CHECK: divb -; CHECK: ret +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: orl %ecx, %edx +; CHECK-NEXT: testl $-256, %edx +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: cltd +; CHECK-NEXT: idivl %ecx +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: # kill: %EAX %EAX %AX +; CHECK-NEXT: divb %cl +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retl %result = sdiv i32 %a, %b ret i32 %result } define i32 @Test_get_remainder(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: Test_get_remainder: -; CHECK: orl %ecx, %edx -; CHECK-NEXT: testl $-256, %edx -; CHECK-NEXT: je -; CHECK: idivl -; CHECK: ret -; CHECK: divb -; CHECK: ret +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: orl %ecx, %edx +; CHECK-NEXT: testl $-256, %edx +; CHECK-NEXT: je .LBB1_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: cltd +; CHECK-NEXT: idivl %ecx +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB1_1: +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: # kill: %EAX %EAX %AX +; CHECK-NEXT: divb %cl +; CHECK-NEXT: movzbl %ah, %eax # NOREX +; CHECK-NEXT: retl %result = srem i32 %a, %b ret i32 %result } define i32 @Test_get_quotient_and_remainder(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: Test_get_quotient_and_remainder: -; CHECK: orl %ecx, %edx -; CHECK-NEXT: testl $-256, %edx -; CHECK-NEXT: je -; CHECK: idivl -; CHECK: divb -; CHECK: addl -; CHECK: ret -; CHECK-NOT: idivl -; CHECK-NOT: divb +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: orl %ecx, %edx +; CHECK-NEXT: testl $-256, %edx +; CHECK-NEXT: je .LBB2_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: cltd +; CHECK-NEXT: idivl %ecx +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB2_1: +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: # kill: %EAX %EAX %AX +; CHECK-NEXT: divb %cl +; CHECK-NEXT: movzbl %ah, %edx # NOREX +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: retl %resultdiv = sdiv i32 %a, %b %resultrem = srem i32 %a, %b %result = add i32 %resultdiv, %resultrem @@ -45,12 +78,48 @@ define i32 @Test_use_div_and_idiv(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: Test_use_div_and_idiv: -; CHECK: idivl -; CHECK: divb -; CHECK: divl -; CHECK: divb -; CHECK: addl -; CHECK: ret +; CHECK: # BB#0: +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: movl %ecx, %edi +; CHECK-NEXT: orl %ebx, %edi +; CHECK-NEXT: testl $-256, %edi +; CHECK-NEXT: je .LBB3_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: cltd +; CHECK-NEXT: idivl %ebx +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: testl $-256, %edi +; CHECK-NEXT: jne .LBB3_5 +; CHECK-NEXT: jmp .LBB3_4 +; CHECK-NEXT: .LBB3_1: +; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: # kill: %EAX %EAX %AX +; CHECK-NEXT: divb %bl +; CHECK-NEXT: movzbl %al, %esi +; CHECK-NEXT: testl $-256, %edi +; CHECK-NEXT: je .LBB3_4 +; CHECK-NEXT: .LBB3_5: +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: divl %ebx +; CHECK-NEXT: jmp .LBB3_6 +; CHECK-NEXT: .LBB3_4: +; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: # kill: %EAX %EAX %AX +; CHECK-NEXT: divb %bl +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: .LBB3_6: +; CHECK-NEXT: addl %eax, %esi +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: retl %resultidiv = sdiv i32 %a, %b %resultdiv = udiv i32 %a, %b %result = add i32 %resultidiv, %resultdiv @@ -59,34 +128,72 @@ define i32 @Test_use_div_imm_imm() nounwind { ; CHECK-LABEL: Test_use_div_imm_imm: -; CHECK: movl $64 +; CHECK: # BB#0: +; CHECK-NEXT: movl $64, %eax +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: retl %resultdiv = sdiv i32 256, 4 ret i32 %resultdiv } define i32 @Test_use_div_reg_imm(i32 %a) nounwind { ; CHECK-LABEL: Test_use_div_reg_imm: -; CHECK-NOT: test -; CHECK-NOT: idiv -; CHECK-NOT: divb +; CHECK: # BB#0: +; CHECK-NEXT: movl $1041204193, %eax # imm = 0x3E0F83E1 +; CHECK-NEXT: imull {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: sarl $3, %edx +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: leal (%edx,%eax), %eax +; CHECK-NEXT: retl %resultdiv = sdiv i32 %a, 33 ret i32 %resultdiv } define i32 @Test_use_rem_reg_imm(i32 %a) nounwind { ; CHECK-LABEL: Test_use_rem_reg_imm: -; CHECK-NOT: test -; CHECK-NOT: idiv -; CHECK-NOT: divb +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl $1041204193, %edx # imm = 0x3E0F83E1 +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: imull %edx +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: sarl $3, %edx +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: addl %eax, %edx +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: shll $5, %eax +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: subl %eax, %ecx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: retl %resultrem = srem i32 %a, 33 ret i32 %resultrem } define i32 @Test_use_divrem_reg_imm(i32 %a) nounwind { ; CHECK-LABEL: Test_use_divrem_reg_imm: -; CHECK-NOT: test -; CHECK-NOT: idiv -; CHECK-NOT: divb +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl $1041204193, %edx # imm = 0x3E0F83E1 +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: imull %edx +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: sarl $3, %edx +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: addl %eax, %edx +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: shll $5, %eax +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: subl %eax, %ecx +; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: retl %resultdiv = sdiv i32 %a, 33 %resultrem = srem i32 %a, 33 %result = add i32 %resultdiv, %resultrem @@ -95,18 +202,44 @@ define i32 @Test_use_div_imm_reg(i32 %a) nounwind { ; CHECK-LABEL: Test_use_div_imm_reg: -; CHECK: test -; CHECK: idiv -; CHECK: divb +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: testl $-256, %ecx +; CHECK-NEXT: je .LBB8_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: movl $4, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: idivl %ecx +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB8_1: +; CHECK-NEXT: movb $4, %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: # kill: %EAX %EAX %AX +; CHECK-NEXT: divb %cl +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retl %resultdiv = sdiv i32 4, %a ret i32 %resultdiv } define i32 @Test_use_rem_imm_reg(i32 %a) nounwind { ; CHECK-LABEL: Test_use_rem_imm_reg: -; CHECK: test -; CHECK: idiv -; CHECK: divb +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: testl $-256, %ecx +; CHECK-NEXT: je .LBB9_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: movl $4, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: idivl %ecx +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB9_1: +; CHECK-NEXT: movb $4, %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: # kill: %EAX %EAX %AX +; CHECK-NEXT: divb %cl +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retl %resultdiv = sdiv i32 4, %a ret i32 %resultdiv } Index: test/CodeGen/X86/slow-div.ll =================================================================== --- test/CodeGen/X86/slow-div.ll +++ test/CodeGen/X86/slow-div.ll @@ -1,26 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivl-to-divb < %s | FileCheck -check-prefix=DIV32 %s ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivq-to-divw < %s | FileCheck -check-prefix=DIV64 %s define i32 @div32(i32 %a, i32 %b) { -entry: ; DIV32-LABEL: div32: -; DIV32: orl %{{.*}}, [[REG:%[a-z]+]] -; DIV32: testl $-256, [[REG]] -; DIV32: divb +; DIV32: # BB#0: # %entry +; DIV32-NEXT: movl %edi, %eax +; DIV32-NEXT: orl %esi, %eax +; DIV32-NEXT: testl $-256, %eax +; DIV32-NEXT: je .LBB0_1 +; DIV32-NEXT: # BB#2: +; DIV32-NEXT: movl %edi, %eax +; DIV32-NEXT: cltd +; DIV32-NEXT: idivl %esi +; DIV32-NEXT: retq +; DIV32-NEXT: .LBB0_1: +; DIV32-NEXT: movzbl %dil, %eax +; DIV32-NEXT: # kill: %EAX %EAX %AX +; DIV32-NEXT: divb %sil +; DIV32-NEXT: movzbl %al, %eax +; DIV32-NEXT: retq +; ; DIV64-LABEL: div32: -; DIV64-NOT: divb +; DIV64: # BB#0: # %entry +; DIV64-NEXT: movl %edi, %eax +; DIV64-NEXT: cltd +; DIV64-NEXT: idivl %esi +; DIV64-NEXT: retq +entry: %div = sdiv i32 %a, %b ret i32 %div } define i64 @div64(i64 %a, i64 %b) { -entry: ; DIV32-LABEL: div64: -; DIV32-NOT: divw +; DIV32: # BB#0: # %entry +; DIV32-NEXT: movq %rdi, %rax +; DIV32-NEXT: cqto +; DIV32-NEXT: idivq %rsi +; DIV32-NEXT: retq +; ; DIV64-LABEL: div64: -; DIV64: orq %{{.*}}, [[REG:%[a-z]+]] -; DIV64: testq $-65536, [[REG]] -; DIV64: divw +; DIV64: # BB#0: # %entry +; DIV64-NEXT: movq %rdi, %rax +; DIV64-NEXT: orq %rsi, %rax +; DIV64-NEXT: testq $-65536, %rax # imm = 0xFFFF0000 +; DIV64-NEXT: je .LBB1_1 +; DIV64-NEXT: # BB#2: +; DIV64-NEXT: movq %rdi, %rax +; DIV64-NEXT: cqto +; DIV64-NEXT: idivq %rsi +; DIV64-NEXT: retq +; DIV64-NEXT: .LBB1_1: +; DIV64-NEXT: xorl %edx, %edx +; DIV64-NEXT: movl %edi, %eax +; DIV64-NEXT: divw %si +; DIV64-NEXT: movzwl %ax, %eax +; DIV64-NEXT: retq +entry: %div = sdiv i64 %a, %b ret i64 %div } @@ -29,14 +66,36 @@ define i32 @div32_optsize(i32 %a, i32 %b) optsize { ; DIV32-LABEL: div32_optsize: -; DIV32-NOT: divb +; DIV32: # BB#0: +; DIV32-NEXT: movl %edi, %eax +; DIV32-NEXT: cltd +; DIV32-NEXT: idivl %esi +; DIV32-NEXT: retq +; +; DIV64-LABEL: div32_optsize: +; DIV64: # BB#0: +; DIV64-NEXT: movl %edi, %eax +; DIV64-NEXT: cltd +; DIV64-NEXT: idivl %esi +; DIV64-NEXT: retq %div = sdiv i32 %a, %b ret i32 %div } define i32 @div32_minsize(i32 %a, i32 %b) minsize { ; DIV32-LABEL: div32_minsize: -; DIV32-NOT: divb +; DIV32: # BB#0: +; DIV32-NEXT: movl %edi, %eax +; DIV32-NEXT: cltd +; DIV32-NEXT: idivl %esi +; DIV32-NEXT: retq +; +; DIV64-LABEL: div32_minsize: +; DIV64: # BB#0: +; DIV64-NEXT: movl %edi, %eax +; DIV64-NEXT: cltd +; DIV64-NEXT: idivl %esi +; DIV64-NEXT: retq %div = sdiv i32 %a, %b ret i32 %div }