Index: llvm/trunk/lib/Target/X86/X86.td =================================================================== --- llvm/trunk/lib/Target/X86/X86.td +++ llvm/trunk/lib/Target/X86/X86.td @@ -209,9 +209,9 @@ def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb", "HasSlowDivide32", "true", "Use 8-bit divide for positive values less than 256">; -def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw", +def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl", "HasSlowDivide64", "true", - "Use 16-bit divide for positive values less than 65536">; + "Use 32-bit divide for positive values less than 2^32">; def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", "PadShortFunctions", "true", "Pad short functions">; @@ -461,6 +461,7 @@ FeatureCMPXCHG16B, FeaturePOPCNT, FeatureAES, + FeatureSlowDivide64, FeaturePCLMUL, FeatureXSAVE, FeatureXSAVEOPT, Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -97,12 +97,12 @@ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); - // Bypass expensive divides on Atom when compiling with O2. + // Bypass expensive divides and use cheaper ones. if (TM.getOptLevel() >= CodeGenOpt::Default) { if (Subtarget.hasSlowDivide32()) addBypassSlowDiv(32, 8); if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit()) - addBypassSlowDiv(64, 16); + addBypassSlowDiv(64, 32); } if (Subtarget.isTargetKnownWindowsMSVC() || Index: llvm/trunk/lib/Target/X86/X86Subtarget.h =================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.h +++ llvm/trunk/lib/Target/X86/X86Subtarget.h @@ -216,7 +216,7 @@ /// 32-bit divisions and should be used when possible. bool HasSlowDivide32; - /// True if 16-bit divides are significantly faster than + /// True if 32-bit divides are significantly faster than /// 64-bit divisions and should be used when possible. bool HasSlowDivide64; Index: llvm/trunk/test/CodeGen/X86/atom-bypass-slow-division-64.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/atom-bypass-slow-division-64.ll +++ llvm/trunk/test/CodeGen/X86/atom-bypass-slow-division-64.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mcpu=atom -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +; RUN: llc < %s -mcpu=sandybridge -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=SNB ; Additional tests for 64-bit divide bypass @@ -7,8 +8,9 @@ ; CHECK-LABEL: Test_get_quotient: ; CHECK: # BB#0: ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 ; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: testq $-65536, %rax # imm = 0xFFFF0000 +; CHECK-NEXT: testq %rcx, %rax ; CHECK-NEXT: je .LBB0_1 ; CHECK-NEXT: # BB#2: ; CHECK-NEXT: movq %rdi, %rax @@ -18,9 +20,28 @@ ; CHECK-NEXT: .LBB0_1: ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: divw %si -; CHECK-NEXT: movzwl %ax, %eax +; CHECK-NEXT: divl %esi +; CHECK-NEXT: # kill: %EAX %EAX %RAX ; CHECK-NEXT: retq +; +; SNB-LABEL: Test_get_quotient: +; SNB: # BB#0: +; SNB-NEXT: movq %rdi, %rax +; SNB-NEXT: orq %rsi, %rax +; SNB-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 +; SNB-NEXT: testq %rcx, %rax +; SNB-NEXT: je .LBB0_1 +; SNB-NEXT: # BB#2: +; SNB-NEXT: movq %rdi, %rax +; SNB-NEXT: cqto +; SNB-NEXT: idivq %rsi +; SNB-NEXT: retq +; SNB-NEXT: .LBB0_1: +; SNB-NEXT: xorl %edx, %edx +; SNB-NEXT: movl %edi, %eax +; SNB-NEXT: divl %esi +; SNB-NEXT: # kill: %EAX %EAX %RAX +; SNB-NEXT: retq %result = sdiv i64 %a, %b ret i64 %result } @@ -29,8 +50,9 @@ ; CHECK-LABEL: Test_get_remainder: ; CHECK: # BB#0: ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 ; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: testq $-65536, %rax # imm = 0xFFFF0000 +; CHECK-NEXT: testq %rcx, %rax ; CHECK-NEXT: je .LBB1_1 ; CHECK-NEXT: # BB#2: ; CHECK-NEXT: movq %rdi, %rax @@ -41,9 +63,31 @@ ; CHECK-NEXT: .LBB1_1: ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: divw %si -; CHECK-NEXT: movzwl %dx, %eax +; CHECK-NEXT: divl %esi +; CHECK-NEXT: # kill: %EDX %EDX %RDX +; CHECK-NEXT: movq %rdx, %rax ; CHECK-NEXT: retq +; +; SNB-LABEL: Test_get_remainder: +; SNB: # BB#0: +; SNB-NEXT: movq %rdi, %rax +; SNB-NEXT: orq %rsi, %rax +; SNB-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 +; SNB-NEXT: testq %rcx, %rax +; SNB-NEXT: je .LBB1_1 +; SNB-NEXT: # BB#2: +; SNB-NEXT: movq %rdi, %rax +; SNB-NEXT: cqto +; SNB-NEXT: idivq %rsi +; SNB-NEXT: movq %rdx, %rax +; SNB-NEXT: retq +; SNB-NEXT: .LBB1_1: +; SNB-NEXT: xorl %edx, %edx +; SNB-NEXT: movl %edi, %eax +; SNB-NEXT: divl %esi +; SNB-NEXT: # kill: %EDX %EDX %RDX +; SNB-NEXT: movq %rdx, %rax +; SNB-NEXT: retq %result = srem i64 %a, %b ret i64 %result } @@ -52,8 +96,9 @@ ; CHECK-LABEL: Test_get_quotient_and_remainder: ; CHECK: # BB#0: ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 ; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: testq $-65536, %rax # imm = 0xFFFF0000 +; CHECK-NEXT: testq %rcx, %rax ; CHECK-NEXT: je .LBB2_1 ; CHECK-NEXT: # BB#2: ; CHECK-NEXT: movq %rdi, %rax @@ -64,11 +109,33 @@ ; CHECK-NEXT: .LBB2_1: ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: divw %si -; CHECK-NEXT: movzwl %ax, %eax -; CHECK-NEXT: movzwl %dx, %edx +; CHECK-NEXT: divl %esi +; CHECK-NEXT: # kill: %EAX %EAX %RAX +; CHECK-NEXT: # kill: %EDX %EDX %RDX ; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: retq +; +; SNB-LABEL: Test_get_quotient_and_remainder: +; SNB: # BB#0: +; SNB-NEXT: movq %rdi, %rax +; SNB-NEXT: orq %rsi, %rax +; SNB-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 +; SNB-NEXT: testq %rcx, %rax +; SNB-NEXT: je .LBB2_1 +; SNB-NEXT: # BB#2: +; SNB-NEXT: movq %rdi, %rax +; SNB-NEXT: cqto +; SNB-NEXT: idivq %rsi +; SNB-NEXT: addq %rdx, %rax +; SNB-NEXT: retq +; SNB-NEXT: .LBB2_1: +; SNB-NEXT: xorl %edx, %edx +; SNB-NEXT: movl %edi, %eax +; SNB-NEXT: divl %esi +; SNB-NEXT: # kill: %EDX %EDX %RDX +; SNB-NEXT: # kill: %EAX %EAX %RAX +; SNB-NEXT: addq %rdx, %rax +; SNB-NEXT: retq %resultdiv = sdiv i64 %a, %b %resultrem = srem i64 %a, %b %result = add i64 %resultdiv, %resultrem Index: llvm/trunk/test/CodeGen/X86/slow-div.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/slow-div.ll +++ llvm/trunk/test/CodeGen/X86/slow-div.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivl-to-divb < %s | FileCheck -check-prefix=DIV32 %s -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivq-to-divw < %s | FileCheck -check-prefix=DIV64 %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivq-to-divl < %s | FileCheck -check-prefix=DIV64 %s define i32 @div32(i32 %a, i32 %b) { entry: @@ -16,11 +16,12 @@ define i64 @div64(i64 %a, i64 %b) { entry: ; DIV32-LABEL: div64: -; DIV32-NOT: divw +; DIV32-NOT: divl ; DIV64-LABEL: div64: -; DIV64: orq %{{.*}}, [[REG:%[a-z]+]] -; DIV64: testq $-65536, [[REG]] -; DIV64: divw +; DIV64-DAG: movabsq $-4294967296, [[REGMSK:%[a-z]+]] +; DIV64-DAG: orq %{{.*}}, [[REG:%[a-z]+]] +; DIV64: testq [[REGMSK]], [[REG]] +; DIV64: divl %div = sdiv i64 %a, %b ret i64 %div }