Index: llvm/trunk/test/CodeGen/X86/atom-bypass-slow-division-64.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/atom-bypass-slow-division-64.ll +++ llvm/trunk/test/CodeGen/X86/atom-bypass-slow-division-64.ll @@ -1,143 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mcpu=atom -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -; RUN: llc < %s -mcpu=sandybridge -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=SNB - -; Additional tests for 64-bit divide bypass - -define i64 @Test_get_quotient(i64 %a, i64 %b) nounwind { -; CHECK-LABEL: Test_get_quotient: -; CHECK: # BB#0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: testq %rcx, %rax -; CHECK-NEXT: je .LBB0_1 -; CHECK-NEXT: # BB#2: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: cqto -; CHECK-NEXT: idivq %rsi -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB0_1: -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: divl %esi -; CHECK-NEXT: # kill: %EAX %EAX %RAX -; CHECK-NEXT: retq -; -; SNB-LABEL: Test_get_quotient: -; SNB: # BB#0: -; SNB-NEXT: movq %rdi, %rax -; SNB-NEXT: orq %rsi, %rax -; SNB-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; SNB-NEXT: testq %rcx, %rax -; SNB-NEXT: je .LBB0_1 -; SNB-NEXT: # BB#2: -; SNB-NEXT: movq %rdi, %rax -; SNB-NEXT: cqto -; SNB-NEXT: idivq %rsi -; SNB-NEXT: retq -; SNB-NEXT: .LBB0_1: -; SNB-NEXT: xorl %edx, %edx -; SNB-NEXT: movl %edi, %eax -; SNB-NEXT: divl %esi -; SNB-NEXT: # kill: %EAX %EAX %RAX -; SNB-NEXT: retq - %result = sdiv i64 %a, %b - ret i64 %result -} - -define i64 @Test_get_remainder(i64 %a, i64 %b) nounwind { -; CHECK-LABEL: Test_get_remainder: -; CHECK: # BB#0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: testq %rcx, %rax -; CHECK-NEXT: je .LBB1_1 -; CHECK-NEXT: # BB#2: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: cqto -; CHECK-NEXT: idivq %rsi -; CHECK-NEXT: movq %rdx, %rax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB1_1: -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: divl %esi -; CHECK-NEXT: # kill: %EDX %EDX %RDX -; CHECK-NEXT: movq %rdx, %rax -; CHECK-NEXT: retq -; -; SNB-LABEL: Test_get_remainder: -; SNB: # BB#0: -; SNB-NEXT: movq %rdi, %rax -; SNB-NEXT: orq %rsi, %rax -; SNB-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; SNB-NEXT: testq %rcx, %rax -; SNB-NEXT: je .LBB1_1 -; SNB-NEXT: # BB#2: -; SNB-NEXT: movq %rdi, %rax -; SNB-NEXT: cqto -; SNB-NEXT: idivq %rsi -; SNB-NEXT: movq %rdx, %rax -; SNB-NEXT: retq -; SNB-NEXT: .LBB1_1: -; SNB-NEXT: xorl %edx, %edx -; SNB-NEXT: movl %edi, %eax -; SNB-NEXT: divl %esi -; SNB-NEXT: # kill: %EDX %EDX %RDX -; SNB-NEXT: movq %rdx, %rax -; SNB-NEXT: retq - %result = srem i64 %a, %b - ret i64 %result -} - -define i64 @Test_get_quotient_and_remainder(i64 %a, i64 %b) nounwind { -; CHECK-LABEL: Test_get_quotient_and_remainder: -; CHECK: # BB#0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: testq %rcx, %rax -; CHECK-NEXT: je .LBB2_1 -; CHECK-NEXT: # BB#2: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: cqto -; CHECK-NEXT: idivq %rsi -; CHECK-NEXT: addq %rdx, %rax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB2_1: -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: divl %esi -; CHECK-NEXT: # kill: %EAX %EAX %RAX -; CHECK-NEXT: # kill: %EDX %EDX %RDX -; CHECK-NEXT: addq %rdx, %rax -; CHECK-NEXT: retq -; -; SNB-LABEL: Test_get_quotient_and_remainder: -; SNB: # BB#0: -; SNB-NEXT: movq %rdi, %rax -; SNB-NEXT: orq %rsi, %rax -; SNB-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; SNB-NEXT: testq %rcx, %rax -; SNB-NEXT: je .LBB2_1 -; SNB-NEXT: # BB#2: -; SNB-NEXT: movq %rdi, %rax -; SNB-NEXT: cqto -; SNB-NEXT: idivq %rsi -; SNB-NEXT: addq %rdx, %rax -; SNB-NEXT: retq -; SNB-NEXT: .LBB2_1: -; SNB-NEXT: xorl %edx, %edx -; SNB-NEXT: movl %edi, %eax -; SNB-NEXT: divl %esi -; SNB-NEXT: # kill: %EDX %EDX %RDX -; SNB-NEXT: # kill: %EAX %EAX %RAX -; SNB-NEXT: addq %rdx, %rax -; SNB-NEXT: retq - %resultdiv = sdiv i64 %a, %b - %resultrem = srem i64 %a, %b - %result = add i64 %resultdiv, %resultrem - ret i64 %result -} Index: llvm/trunk/test/CodeGen/X86/atom-bypass-slow-division.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/atom-bypass-slow-division.ll +++ llvm/trunk/test/CodeGen/X86/atom-bypass-slow-division.ll @@ -1,245 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s - -define i32 @Test_get_quotient(i32 %a, i32 %b) nounwind { -; CHECK-LABEL: Test_get_quotient: -; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: orl %ecx, %edx -; CHECK-NEXT: testl $-256, %edx -; CHECK-NEXT: je .LBB0_1 -; CHECK-NEXT: # BB#2: -; CHECK-NEXT: cltd -; CHECK-NEXT: idivl %ecx -; CHECK-NEXT: retl -; CHECK-NEXT: .LBB0_1: -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: # kill: %EAX %EAX %AX -; CHECK-NEXT: divb %cl -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: retl - %result = sdiv i32 %a, %b - ret i32 %result -} - -define i32 @Test_get_remainder(i32 %a, i32 %b) nounwind { -; CHECK-LABEL: Test_get_remainder: -; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: orl %ecx, %edx -; CHECK-NEXT: testl $-256, %edx -; CHECK-NEXT: je .LBB1_1 -; CHECK-NEXT: # BB#2: -; CHECK-NEXT: cltd -; CHECK-NEXT: idivl %ecx -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: retl -; CHECK-NEXT: .LBB1_1: -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: # kill: %EAX %EAX %AX -; CHECK-NEXT: divb %cl -; CHECK-NEXT: movzbl %ah, %eax # NOREX -; CHECK-NEXT: retl - %result = srem i32 %a, %b - ret i32 %result -} - -define i32 @Test_get_quotient_and_remainder(i32 %a, i32 %b) nounwind { -; CHECK-LABEL: Test_get_quotient_and_remainder: -; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: orl %ecx, %edx -; CHECK-NEXT: testl $-256, %edx -; CHECK-NEXT: je .LBB2_1 -; CHECK-NEXT: # BB#2: -; CHECK-NEXT: cltd -; CHECK-NEXT: idivl %ecx -; CHECK-NEXT: addl %edx, %eax -; CHECK-NEXT: retl -; CHECK-NEXT: .LBB2_1: -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: # kill: %EAX %EAX %AX -; CHECK-NEXT: divb %cl -; CHECK-NEXT: movzbl %ah, %edx # NOREX -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: addl %edx, %eax -; CHECK-NEXT: retl - %resultdiv = sdiv i32 %a, %b - %resultrem = srem i32 %a, %b - %result = add i32 %resultdiv, %resultrem - ret i32 %result -} - -define i32 @Test_use_div_and_idiv(i32 %a, i32 %b) nounwind { -; CHECK-LABEL: Test_use_div_and_idiv: -; CHECK: # BB#0: -; CHECK-NEXT: pushl %ebx -; CHECK-NEXT: pushl %edi -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx -; CHECK-NEXT: movl %ecx, %edi -; CHECK-NEXT: orl %ebx, %edi -; CHECK-NEXT: testl $-256, %edi -; CHECK-NEXT: je .LBB3_1 -; CHECK-NEXT: # BB#2: -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: cltd -; CHECK-NEXT: idivl %ebx -; CHECK-NEXT: movl %eax, %esi -; CHECK-NEXT: testl $-256, %edi -; CHECK-NEXT: jne .LBB3_5 -; CHECK-NEXT: jmp .LBB3_4 -; CHECK-NEXT: .LBB3_1: -; CHECK-NEXT: movzbl %cl, %eax -; CHECK-NEXT: # kill: %EAX %EAX %AX -; CHECK-NEXT: divb %bl -; CHECK-NEXT: movzbl %al, %esi -; CHECK-NEXT: testl $-256, %edi -; CHECK-NEXT: je .LBB3_4 -; CHECK-NEXT: .LBB3_5: -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: divl %ebx -; CHECK-NEXT: jmp .LBB3_6 -; CHECK-NEXT: .LBB3_4: -; CHECK-NEXT: movzbl %cl, %eax -; CHECK-NEXT: # kill: %EAX %EAX %AX -; CHECK-NEXT: divb %bl -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: .LBB3_6: -; CHECK-NEXT: addl %eax, %esi -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: popl %esi -; CHECK-NEXT: popl %edi -; CHECK-NEXT: popl %ebx -; CHECK-NEXT: retl - %resultidiv = sdiv i32 %a, %b - %resultdiv = udiv i32 %a, %b - %result = add i32 %resultidiv, %resultdiv - ret i32 %result -} - -define i32 @Test_use_div_imm_imm() nounwind { -; CHECK-LABEL: Test_use_div_imm_imm: -; CHECK: # BB#0: -; CHECK-NEXT: movl $64, %eax -; CHECK-NEXT: nop -; CHECK-NEXT: nop -; CHECK-NEXT: nop -; CHECK-NEXT: nop -; CHECK-NEXT: nop -; CHECK-NEXT: nop -; CHECK-NEXT: retl - %resultdiv = sdiv i32 256, 4 - ret i32 %resultdiv -} - -define i32 @Test_use_div_reg_imm(i32 %a) nounwind { -; CHECK-LABEL: Test_use_div_reg_imm: -; CHECK: # BB#0: -; CHECK-NEXT: movl $1041204193, %eax # imm = 0x3E0F83E1 -; CHECK-NEXT: imull {{[0-9]+}}(%esp) -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: sarl $3, %edx -; CHECK-NEXT: shrl $31, %eax -; CHECK-NEXT: leal (%edx,%eax), %eax -; CHECK-NEXT: retl - %resultdiv = sdiv i32 %a, 33 - ret i32 %resultdiv -} - -define i32 @Test_use_rem_reg_imm(i32 %a) nounwind { -; CHECK-LABEL: Test_use_rem_reg_imm: -; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl $1041204193, %edx # imm = 0x3E0F83E1 -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: imull %edx -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: sarl $3, %edx -; CHECK-NEXT: shrl $31, %eax -; CHECK-NEXT: addl %eax, %edx -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: shll $5, %eax -; CHECK-NEXT: addl %edx, %eax -; CHECK-NEXT: subl %eax, %ecx -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: retl - %resultrem = srem i32 %a, 33 - ret i32 %resultrem -} - -define i32 @Test_use_divrem_reg_imm(i32 %a) nounwind { -; CHECK-LABEL: Test_use_divrem_reg_imm: -; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl $1041204193, %edx # imm = 0x3E0F83E1 -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: imull %edx -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: sarl $3, %edx -; CHECK-NEXT: shrl $31, %eax -; CHECK-NEXT: addl %eax, %edx -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: shll $5, %eax -; CHECK-NEXT: addl %edx, %eax -; CHECK-NEXT: subl %eax, %ecx -; CHECK-NEXT: addl %edx, %ecx -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: retl - %resultdiv = sdiv i32 %a, 33 - %resultrem = srem i32 %a, 33 - %result = add i32 %resultdiv, %resultrem - ret i32 %result -} - -define i32 @Test_use_div_imm_reg(i32 %a) nounwind { -; CHECK-LABEL: Test_use_div_imm_reg: -; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: testl $-256, %ecx -; CHECK-NEXT: je .LBB8_1 -; CHECK-NEXT: # BB#2: -; CHECK-NEXT: movl $4, %eax -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: idivl %ecx -; CHECK-NEXT: retl -; CHECK-NEXT: .LBB8_1: -; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: # kill: %EAX %EAX %AX -; CHECK-NEXT: divb %cl -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: retl - %resultdiv = sdiv i32 4, %a - ret i32 %resultdiv -} - -define i32 @Test_use_rem_imm_reg(i32 %a) nounwind { -; CHECK-LABEL: Test_use_rem_imm_reg: -; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: testl $-256, %ecx -; CHECK-NEXT: je .LBB9_1 -; CHECK-NEXT: # BB#2: -; CHECK-NEXT: movl $4, %eax -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: idivl %ecx -; CHECK-NEXT: retl -; CHECK-NEXT: .LBB9_1: -; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: # kill: %EAX %EAX %AX -; CHECK-NEXT: divb %cl -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: retl - %resultdiv = sdiv i32 4, %a - ret i32 %resultdiv -} Index: llvm/trunk/test/CodeGen/X86/bypass-slow-division-32.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bypass-slow-division-32.ll +++ llvm/trunk/test/CodeGen/X86/bypass-slow-division-32.ll @@ -0,0 +1,245 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s + +define i32 @Test_get_quotient(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: Test_get_quotient: +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: orl %ecx, %edx +; CHECK-NEXT: testl $-256, %edx +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: cltd +; CHECK-NEXT: idivl %ecx +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: # kill: %EAX %EAX %AX +; CHECK-NEXT: divb %cl +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retl + %result = sdiv i32 %a, %b + ret i32 %result +} + +define i32 @Test_get_remainder(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: Test_get_remainder: +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: orl %ecx, %edx +; CHECK-NEXT: testl $-256, %edx +; CHECK-NEXT: je .LBB1_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: cltd +; CHECK-NEXT: idivl %ecx +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB1_1: +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: # kill: %EAX %EAX %AX +; CHECK-NEXT: divb %cl +; CHECK-NEXT: movzbl %ah, %eax # NOREX +; CHECK-NEXT: retl + %result = srem i32 %a, %b + ret i32 %result +} + +define i32 @Test_get_quotient_and_remainder(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: Test_get_quotient_and_remainder: +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: orl %ecx, %edx +; CHECK-NEXT: testl $-256, %edx +; CHECK-NEXT: je .LBB2_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: cltd +; CHECK-NEXT: idivl %ecx +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB2_1: +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: # kill: %EAX %EAX %AX +; CHECK-NEXT: divb %cl +; CHECK-NEXT: movzbl %ah, %edx # NOREX +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: retl + %resultdiv = sdiv i32 %a, %b + %resultrem = srem i32 %a, %b + %result = add i32 %resultdiv, %resultrem + ret i32 %result +} + +define i32 @Test_use_div_and_idiv(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: Test_use_div_and_idiv: +; CHECK: # BB#0: +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: movl %ecx, %edi +; CHECK-NEXT: orl %ebx, %edi +; CHECK-NEXT: testl $-256, %edi +; CHECK-NEXT: je .LBB3_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: cltd +; CHECK-NEXT: idivl %ebx +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: testl $-256, %edi +; CHECK-NEXT: jne .LBB3_5 +; CHECK-NEXT: jmp .LBB3_4 +; CHECK-NEXT: .LBB3_1: +; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: # kill: %EAX %EAX %AX +; CHECK-NEXT: divb %bl +; CHECK-NEXT: movzbl %al, %esi +; CHECK-NEXT: testl $-256, %edi +; CHECK-NEXT: je .LBB3_4 +; CHECK-NEXT: .LBB3_5: +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: divl %ebx +; CHECK-NEXT: jmp .LBB3_6 +; CHECK-NEXT: .LBB3_4: +; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: # kill: %EAX %EAX %AX +; CHECK-NEXT: divb %bl +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: .LBB3_6: +; CHECK-NEXT: addl %eax, %esi +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: retl + %resultidiv = sdiv i32 %a, %b + %resultdiv = udiv i32 %a, %b + %result = add i32 %resultidiv, %resultdiv + ret i32 %result +} + +define i32 @Test_use_div_imm_imm() nounwind { +; CHECK-LABEL: Test_use_div_imm_imm: +; CHECK: # BB#0: +; CHECK-NEXT: movl $64, %eax +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: retl + %resultdiv = sdiv i32 256, 4 + ret i32 %resultdiv +} + +define i32 @Test_use_div_reg_imm(i32 %a) nounwind { +; CHECK-LABEL: Test_use_div_reg_imm: +; CHECK: # BB#0: +; CHECK-NEXT: movl $1041204193, %eax # imm = 0x3E0F83E1 +; CHECK-NEXT: imull {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: sarl $3, %edx +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: leal (%edx,%eax), %eax +; CHECK-NEXT: retl + %resultdiv = sdiv i32 %a, 33 + ret i32 %resultdiv +} + +define i32 @Test_use_rem_reg_imm(i32 %a) nounwind { +; CHECK-LABEL: Test_use_rem_reg_imm: +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl $1041204193, %edx # imm = 0x3E0F83E1 +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: imull %edx +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: sarl $3, %edx +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: addl %eax, %edx +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: shll $5, %eax +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: subl %eax, %ecx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: retl + %resultrem = srem i32 %a, 33 + ret i32 %resultrem +} + +define i32 @Test_use_divrem_reg_imm(i32 %a) nounwind { +; CHECK-LABEL: Test_use_divrem_reg_imm: +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl $1041204193, %edx # imm = 0x3E0F83E1 +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: imull %edx +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: sarl $3, %edx +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: addl %eax, %edx +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: shll $5, %eax +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: subl %eax, %ecx +; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: retl + %resultdiv = sdiv i32 %a, 33 + %resultrem = srem i32 %a, 33 + %result = add i32 %resultdiv, %resultrem + ret i32 %result +} + +define i32 @Test_use_div_imm_reg(i32 %a) nounwind { +; CHECK-LABEL: Test_use_div_imm_reg: +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: testl $-256, %ecx +; CHECK-NEXT: je .LBB8_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: movl $4, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: idivl %ecx +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB8_1: +; CHECK-NEXT: movb $4, %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: # kill: %EAX %EAX %AX +; CHECK-NEXT: divb %cl +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retl + %resultdiv = sdiv i32 4, %a + ret i32 %resultdiv +} + +define i32 @Test_use_rem_imm_reg(i32 %a) nounwind { +; CHECK-LABEL: Test_use_rem_imm_reg: +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: testl $-256, %ecx +; CHECK-NEXT: je .LBB9_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: movl $4, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: idivl %ecx +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB9_1: +; CHECK-NEXT: movb $4, %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: # kill: %EAX %EAX %AX +; CHECK-NEXT: divb %cl +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retl + %resultdiv = sdiv i32 4, %a + ret i32 %resultdiv +} Index: llvm/trunk/test/CodeGen/X86/bypass-slow-division-64.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bypass-slow-division-64.ll +++ llvm/trunk/test/CodeGen/X86/bypass-slow-division-64.ll @@ -0,0 +1,143 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mcpu=atom -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +; RUN: llc < %s -mcpu=sandybridge -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=SNB + +; Additional tests for 64-bit divide bypass + +define i64 @Test_get_quotient(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: Test_get_quotient: +; CHECK: # BB#0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 +; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: testq %rcx, %rax +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: cqto +; CHECK-NEXT: idivq %rsi +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: divl %esi +; CHECK-NEXT: # kill: %EAX %EAX %RAX +; CHECK-NEXT: retq +; +; SNB-LABEL: Test_get_quotient: +; SNB: # BB#0: +; SNB-NEXT: movq %rdi, %rax +; SNB-NEXT: orq %rsi, %rax +; SNB-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 +; SNB-NEXT: testq %rcx, %rax +; SNB-NEXT: je .LBB0_1 +; SNB-NEXT: # BB#2: +; SNB-NEXT: movq %rdi, %rax +; SNB-NEXT: cqto +; SNB-NEXT: idivq %rsi +; SNB-NEXT: retq +; SNB-NEXT: .LBB0_1: +; SNB-NEXT: xorl %edx, %edx +; SNB-NEXT: movl %edi, %eax +; SNB-NEXT: divl %esi +; SNB-NEXT: # kill: %EAX %EAX %RAX +; SNB-NEXT: retq + %result = sdiv i64 %a, %b + ret i64 %result +} + +define i64 @Test_get_remainder(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: Test_get_remainder: +; CHECK: # BB#0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 +; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: testq %rcx, %rax +; CHECK-NEXT: je .LBB1_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: cqto +; CHECK-NEXT: idivq %rsi +; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB1_1: +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: divl %esi +; CHECK-NEXT: # kill: %EDX %EDX %RDX +; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: retq +; +; SNB-LABEL: Test_get_remainder: +; SNB: # BB#0: +; SNB-NEXT: movq %rdi, %rax +; SNB-NEXT: orq %rsi, %rax +; SNB-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 +; SNB-NEXT: testq %rcx, %rax +; SNB-NEXT: je .LBB1_1 +; SNB-NEXT: # BB#2: +; SNB-NEXT: movq %rdi, %rax +; SNB-NEXT: cqto +; SNB-NEXT: idivq %rsi +; SNB-NEXT: movq %rdx, %rax +; SNB-NEXT: retq +; SNB-NEXT: .LBB1_1: +; SNB-NEXT: xorl %edx, %edx +; SNB-NEXT: movl %edi, %eax +; SNB-NEXT: divl %esi +; SNB-NEXT: # kill: %EDX %EDX %RDX +; SNB-NEXT: movq %rdx, %rax +; SNB-NEXT: retq + %result = srem i64 %a, %b + ret i64 %result +} + +define i64 @Test_get_quotient_and_remainder(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: Test_get_quotient_and_remainder: +; CHECK: # BB#0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 +; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: testq %rcx, %rax +; CHECK-NEXT: je .LBB2_1 +; CHECK-NEXT: # BB#2: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: cqto +; CHECK-NEXT: idivq %rsi +; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB2_1: +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: divl %esi +; CHECK-NEXT: # kill: %EAX %EAX %RAX +; CHECK-NEXT: # kill: %EDX %EDX %RDX +; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: retq +; +; SNB-LABEL: Test_get_quotient_and_remainder: +; SNB: # BB#0: +; SNB-NEXT: movq %rdi, %rax +; SNB-NEXT: orq %rsi, %rax +; SNB-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 +; SNB-NEXT: testq %rcx, %rax +; SNB-NEXT: je .LBB2_1 +; SNB-NEXT: # BB#2: +; SNB-NEXT: movq %rdi, %rax +; SNB-NEXT: cqto +; SNB-NEXT: idivq %rsi +; SNB-NEXT: addq %rdx, %rax +; SNB-NEXT: retq +; SNB-NEXT: .LBB2_1: +; SNB-NEXT: xorl %edx, %edx +; SNB-NEXT: movl %edi, %eax +; SNB-NEXT: divl %esi +; SNB-NEXT: # kill: %EDX %EDX %RDX +; SNB-NEXT: # kill: %EAX %EAX %RAX +; SNB-NEXT: addq %rdx, %rax +; SNB-NEXT: retq + %resultdiv = sdiv i64 %a, %b + %resultrem = srem i64 %a, %b + %result = add i64 %resultdiv, %resultrem + ret i64 %result +} Index: llvm/trunk/test/CodeGen/X86/bypass-slow-division-tune.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bypass-slow-division-tune.ll +++ llvm/trunk/test/CodeGen/X86/bypass-slow-division-tune.ll @@ -0,0 +1,44 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivl-to-divb < %s | FileCheck -check-prefix=DIV32 %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivq-to-divl < %s | FileCheck -check-prefix=DIV64 %s + +define i32 @div32(i32 %a, i32 %b) { +entry: +; DIV32-LABEL: div32: +; DIV32: orl %{{.*}}, [[REG:%[a-z]+]] +; DIV32: testl $-256, [[REG]] +; DIV32: divb +; DIV64-LABEL: div32: +; DIV64-NOT: divb + %div = sdiv i32 %a, %b + ret i32 %div +} + +define i64 @div64(i64 %a, i64 %b) { +entry: +; DIV32-LABEL: div64: +; DIV32-NOT: divl +; DIV64-LABEL: div64: +; DIV64-DAG: movabsq $-4294967296, [[REGMSK:%[a-z]+]] +; DIV64-DAG: orq %{{.*}}, [[REG:%[a-z]+]] +; DIV64: testq [[REGMSK]], [[REG]] +; DIV64: divl + %div = sdiv i64 %a, %b + ret i64 %div +} + +; Verify that no extra code is generated when optimizing for size. + +define i32 @div32_optsize(i32 %a, i32 %b) optsize { +; DIV32-LABEL: div32_optsize: +; DIV32-NOT: divb + %div = sdiv i32 %a, %b + ret i32 %div +} + +define i32 @div32_minsize(i32 %a, i32 %b) minsize { +; DIV32-LABEL: div32_minsize: +; DIV32-NOT: divb + %div = sdiv i32 %a, %b + ret i32 %div +} + Index: llvm/trunk/test/CodeGen/X86/slow-div.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/slow-div.ll +++ llvm/trunk/test/CodeGen/X86/slow-div.ll @@ -1,44 +0,0 @@ -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivl-to-divb < %s | FileCheck -check-prefix=DIV32 %s -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivq-to-divl < %s | FileCheck -check-prefix=DIV64 %s - -define i32 @div32(i32 %a, i32 %b) { -entry: -; DIV32-LABEL: div32: -; DIV32: orl %{{.*}}, [[REG:%[a-z]+]] -; DIV32: testl $-256, [[REG]] -; DIV32: divb -; DIV64-LABEL: div32: -; DIV64-NOT: divb - %div = sdiv i32 %a, %b - ret i32 %div -} - -define i64 @div64(i64 %a, i64 %b) { -entry: -; DIV32-LABEL: div64: -; DIV32-NOT: divl -; DIV64-LABEL: div64: -; DIV64-DAG: movabsq $-4294967296, [[REGMSK:%[a-z]+]] -; DIV64-DAG: orq %{{.*}}, [[REG:%[a-z]+]] -; DIV64: testq [[REGMSK]], [[REG]] -; DIV64: divl - %div = sdiv i64 %a, %b - ret i64 %div -} - -; Verify that no extra code is generated when optimizing for size. - -define i32 @div32_optsize(i32 %a, i32 %b) optsize { -; DIV32-LABEL: div32_optsize: -; DIV32-NOT: divb - %div = sdiv i32 %a, %b - ret i32 %div -} - -define i32 @div32_minsize(i32 %a, i32 %b) minsize { -; DIV32-LABEL: div32_minsize: -; DIV32-NOT: divb - %div = sdiv i32 %a, %b - ret i32 %div -} -