Index: llvm/trunk/test/CodeGen/X86/bypass-slow-division-32.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bypass-slow-division-32.ll +++ llvm/trunk/test/CodeGen/X86/bypass-slow-division-32.ll @@ -1,11 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s +; Check that 32-bit division is bypassed correctly. +; RUN: llc < %s -mattr=+idivl-to-divb -mtriple=i686-linux | FileCheck %s define i32 @Test_get_quotient(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: Test_get_quotient: ; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, %edx ; CHECK-NEXT: orl %ecx, %edx ; CHECK-NEXT: testl $-256, %edx @@ -27,8 +28,8 @@ define i32 @Test_get_remainder(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: Test_get_remainder: ; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, %edx ; CHECK-NEXT: orl %ecx, %edx ; CHECK-NEXT: testl $-256, %edx @@ -51,8 +52,8 @@ define i32 @Test_get_quotient_and_remainder(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: Test_get_quotient_and_remainder: ; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, %edx ; CHECK-NEXT: orl %ecx, %edx ; CHECK-NEXT: testl $-256, %edx @@ -82,8 +83,8 @@ ; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl %ecx, %edi ; CHECK-NEXT: orl %ebx, %edi ; CHECK-NEXT: testl $-256, %edi @@ -130,12 +131,6 @@ ; CHECK-LABEL: Test_use_div_imm_imm: ; CHECK: # BB#0: ; CHECK-NEXT: movl $64, %eax -; CHECK-NEXT: nop -; CHECK-NEXT: nop -; CHECK-NEXT: nop -; CHECK-NEXT: nop -; CHECK-NEXT: nop -; CHECK-NEXT: nop ; CHECK-NEXT: retl %resultdiv = sdiv i32 256, 4 ret i32 %resultdiv @@ -147,8 +142,8 @@ ; CHECK-NEXT: movl $1041204193, %eax # imm = 0x3E0F83E1 ; CHECK-NEXT: imull {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: sarl $3, %edx ; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: sarl $3, %edx ; CHECK-NEXT: leal (%edx,%eax), %eax ; CHECK-NEXT: retl %resultdiv = sdiv i32 %a, 33 @@ -163,8 +158,8 @@ ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: imull %edx ; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: sarl $3, %edx ; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: sarl $3, %edx ; CHECK-NEXT: addl %eax, %edx ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: shll $5, %eax @@ -184,8 +179,8 @@ ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: imull %edx ; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: sarl $3, %edx ; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: sarl $3, %edx ; CHECK-NEXT: addl %eax, %edx ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: shll $5, %eax Index: llvm/trunk/test/CodeGen/X86/bypass-slow-division-64.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bypass-slow-division-64.ll +++ llvm/trunk/test/CodeGen/X86/bypass-slow-division-64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mcpu=atom -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -; RUN: llc < %s -mcpu=sandybridge -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=SNB +; Check that 64-bit division is bypassed correctly. +; RUN: llc < %s -mattr=+idivq-to-divl -mtriple=x86_64-unknown-linux-gnu | FileCheck %s ; Additional tests for 64-bit divide bypass @@ -8,8 +8,8 @@ ; CHECK-LABEL: Test_get_quotient: ; CHECK: # BB#0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 ; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 ; CHECK-NEXT: testq %rcx, %rax ; CHECK-NEXT: je .LBB0_1 ; CHECK-NEXT: # BB#2: @@ -23,25 +23,6 @@ ; CHECK-NEXT: divl %esi ; CHECK-NEXT: # kill: %EAX %EAX %RAX ; CHECK-NEXT: retq -; -; SNB-LABEL: Test_get_quotient: -; SNB: # BB#0: -; SNB-NEXT: movq %rdi, %rax -; SNB-NEXT: orq %rsi, %rax -; SNB-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; SNB-NEXT: testq %rcx, %rax -; SNB-NEXT: je .LBB0_1 -; SNB-NEXT: # BB#2: -; SNB-NEXT: movq %rdi, %rax -; SNB-NEXT: cqto -; SNB-NEXT: idivq %rsi -; SNB-NEXT: retq -; SNB-NEXT: .LBB0_1: -; SNB-NEXT: xorl %edx, %edx -; SNB-NEXT: movl %edi, %eax -; SNB-NEXT: divl %esi -; SNB-NEXT: # kill: %EAX %EAX %RAX -; SNB-NEXT: retq %result = sdiv i64 %a, %b ret i64 %result } @@ -50,8 +31,8 @@ ; CHECK-LABEL: Test_get_remainder: ; CHECK: # BB#0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 ; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 ; CHECK-NEXT: testq %rcx, %rax ; CHECK-NEXT: je .LBB1_1 ; CHECK-NEXT: # BB#2: @@ -67,27 +48,6 @@ ; CHECK-NEXT: # kill: %EDX %EDX %RDX ; CHECK-NEXT: movq %rdx, %rax ; CHECK-NEXT: retq -; -; SNB-LABEL: Test_get_remainder: -; SNB: # BB#0: -; SNB-NEXT: movq %rdi, %rax -; SNB-NEXT: orq %rsi, %rax -; SNB-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; SNB-NEXT: testq %rcx, %rax -; SNB-NEXT: je .LBB1_1 -; SNB-NEXT: # BB#2: -; SNB-NEXT: movq %rdi, %rax -; SNB-NEXT: cqto -; SNB-NEXT: idivq %rsi -; SNB-NEXT: movq %rdx, %rax -; SNB-NEXT: retq -; SNB-NEXT: .LBB1_1: -; SNB-NEXT: xorl %edx, %edx -; SNB-NEXT: movl %edi, %eax -; SNB-NEXT: divl %esi -; SNB-NEXT: # kill: %EDX %EDX %RDX -; SNB-NEXT: movq %rdx, %rax -; SNB-NEXT: retq %result = srem i64 %a, %b ret i64 %result } @@ -96,8 +56,8 @@ ; CHECK-LABEL: Test_get_quotient_and_remainder: ; CHECK: # BB#0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 ; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 ; CHECK-NEXT: testq %rcx, %rax ; CHECK-NEXT: je .LBB2_1 ; CHECK-NEXT: # BB#2: @@ -110,32 +70,10 @@ ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: divl %esi -; CHECK-NEXT: # kill: %EAX %EAX %RAX ; CHECK-NEXT: # kill: %EDX %EDX %RDX +; CHECK-NEXT: # kill: %EAX %EAX %RAX ; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: retq -; -; SNB-LABEL: Test_get_quotient_and_remainder: -; SNB: # BB#0: -; SNB-NEXT: movq %rdi, %rax -; SNB-NEXT: orq %rsi, %rax -; SNB-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; SNB-NEXT: testq %rcx, %rax -; SNB-NEXT: je .LBB2_1 -; SNB-NEXT: # BB#2: -; SNB-NEXT: movq %rdi, %rax -; SNB-NEXT: cqto -; SNB-NEXT: idivq %rsi -; SNB-NEXT: addq %rdx, %rax -; SNB-NEXT: retq -; SNB-NEXT: .LBB2_1: -; SNB-NEXT: xorl %edx, %edx -; SNB-NEXT: movl %edi, %eax -; SNB-NEXT: divl %esi -; SNB-NEXT: # kill: %EDX %EDX %RDX -; SNB-NEXT: # kill: %EAX %EAX %RAX -; SNB-NEXT: addq %rdx, %rax -; SNB-NEXT: retq %resultdiv = sdiv i64 %a, %b %resultrem = srem i64 %a, %b %result = add i64 %resultdiv, %resultrem Index: llvm/trunk/test/CodeGen/X86/bypass-slow-division-tune.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bypass-slow-division-tune.ll +++ llvm/trunk/test/CodeGen/X86/bypass-slow-division-tune.ll @@ -1,44 +1,56 @@ -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivl-to-divb < %s | FileCheck -check-prefix=DIV32 %s -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivq-to-divl < %s | FileCheck -check-prefix=DIV64 %s +; Check that a division is bypassed when appropriate only. +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=atom < %s | FileCheck -check-prefixes=ATOM,CHECK %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=silvermont < %s | FileCheck -check-prefixes=REST,CHECK %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck -check-prefixes=REST,CHECK %s +; Verify that div32 is bypassed only for Atoms. define i32 @div32(i32 %a, i32 %b) { entry: -; DIV32-LABEL: div32: -; DIV32: orl %{{.*}}, [[REG:%[a-z]+]] -; DIV32: testl $-256, [[REG]] -; DIV32: divb -; DIV64-LABEL: div32: -; DIV64-NOT: divb +; ATOM-LABEL: div32: +; ATOM: orl %{{.*}}, [[REG:%[a-z]+]] +; ATOM: testl $-256, [[REG]] +; ATOM: divb +; +; REST-LABEL: div32: +; REST-NOT: divb +; %div = sdiv i32 %a, %b ret i32 %div } +; Verify that div64 is always bypassed. define i64 @div64(i64 %a, i64 %b) { entry: -; DIV32-LABEL: div64: -; DIV32-NOT: divl -; DIV64-LABEL: div64: -; DIV64-DAG: movabsq $-4294967296, [[REGMSK:%[a-z]+]] -; DIV64-DAG: orq %{{.*}}, [[REG:%[a-z]+]] -; DIV64: testq [[REGMSK]], [[REG]] -; DIV64: divl +; CHECK-LABEL: div64: +; CHECK-DAG: movabsq $-4294967296, [[REGMSK:%[a-z]+]] +; CHECK-DAG: orq %{{.*}}, [[REG:%[a-z]+]] +; CHECK: testq [[REGMSK]], [[REG]] +; CHECK: divl +; %div = sdiv i64 %a, %b ret i64 %div } + ; Verify that no extra code is generated when optimizing for size. +define i64 @div64_optsize(i64 %a, i64 %b) optsize { +; CHECK-LABEL: div64_optsize: +; CHECK-NOT: divl + %div = sdiv i64 %a, %b + ret i64 %div +} + define i32 @div32_optsize(i32 %a, i32 %b) optsize { -; DIV32-LABEL: div32_optsize: -; DIV32-NOT: divb +; CHECK-LABEL: div32_optsize: +; CHECK-NOT: divb %div = sdiv i32 %a, %b ret i32 %div } define i32 @div32_minsize(i32 %a, i32 %b) minsize { -; DIV32-LABEL: div32_minsize: -; DIV32-NOT: divb +; CHECK-LABEL: div32_minsize: +; CHECK-NOT: divb %div = sdiv i32 %a, %b ret i32 %div } -