diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1481,6 +1481,9 @@ /// Reassociate floating point divisions into multiply by reciprocal. unsigned combineRepeatedFPDivisors() const override; + + SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, + SmallVectorImpl &Created) const override; }; namespace X86 { diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20096,6 +20096,50 @@ return 2; } +SDValue +X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + SmallVectorImpl &Created) const { + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + if (isIntDivCheap(N->getValueType(0), Attr)) + return SDValue(N,0); // Lower SDIV as SDIV + + // fold (sdiv X, pow2) + EVT VT = N->getValueType(0); + if (!Subtarget.hasCMov() || + (VT != MVT::i32 && !(Subtarget.is64Bit() && VT == MVT::i64)) || + !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()) || + Divisor.isMinSignedValue()) + return SDValue(); + + SDLoc DL(N); + SDValue N0 = N->getOperand(0); + unsigned Lg2 = Divisor.countTrailingZeros(); + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT); + + // Add (N0 < 0) ? Pow2 - 1 : 0; + SDValue Cmp = DAG.getSetCC(DL, MVT::i8, N0, Zero, ISD::SETLT); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne); + SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0); + + Created.push_back(Cmp.getNode()); + Created.push_back(Add.getNode()); + Created.push_back(CMov.getNode()); + + // Divide by pow2. + SDValue SRA = + DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i64)); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (Divisor.isNonNegative()) + return SRA; + + Created.push_back(SRA.getNode()); + return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA); +} + /// Result of 'and' is compared against zero. Change to a BT node if possible. /// Returns the BT node and the condition code needed to use it. static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, diff --git a/llvm/test/CodeGen/X86/pr32588.ll b/llvm/test/CodeGen/X86/pr32588.ll --- a/llvm/test/CodeGen/X86/pr32588.ll +++ b/llvm/test/CodeGen/X86/pr32588.ll @@ -8,9 +8,17 @@ define void @fn1() { ; CHECK-LABEL: fn1: ; CHECK: # %bb.0: +; CHECK-NEXT: cmpl $0, {{.*}}(%rip) +; CHECK-NEXT: setne %al +; CHECK-NEXT: cmpl $0, {{.*}}(%rip) +; CHECK-NEXT: setne %cl +; CHECK-NEXT: andb %al, %cl +; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: negl %eax +; CHECK-NEXT: cmovsl %ecx, %eax ; CHECK-NEXT: cmpl $1, {{.*}}(%rip) -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: adcl $0, %eax ; CHECK-NEXT: movl %eax, {{.*}}(%rip) ; CHECK-NEXT: retq %t0 = load i32, i32* @c, align 4 diff --git a/llvm/test/CodeGen/X86/rem.ll b/llvm/test/CodeGen/X86/rem.ll --- a/llvm/test/CodeGen/X86/rem.ll +++ b/llvm/test/CodeGen/X86/rem.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i386-unknown-unknown | FileCheck %s +; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=cmov | FileCheck %s define i32 @test1(i32 %X) { ; CHECK-LABEL: test1: @@ -27,10 +27,9 @@ ; CHECK-LABEL: test2: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: sarl $31, %ecx -; CHECK-NEXT: shrl $24, %ecx -; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: leal 255(%eax), %ecx +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: cmovnsl %eax, %ecx ; CHECK-NEXT: andl $-256, %ecx ; CHECK-NEXT: subl %ecx, %eax ; CHECK-NEXT: retl diff --git a/llvm/test/CodeGen/X86/srem-seteq.ll b/llvm/test/CodeGen/X86/srem-seteq.ll --- a/llvm/test/CodeGen/X86/srem-seteq.ll +++ b/llvm/test/CodeGen/X86/srem-seteq.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=cmov < %s | FileCheck %s --check-prefixes=CHECK,X86 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X64 ;------------------------------------------------------------------------------; @@ -318,10 +318,9 @@ ; X86-LABEL: test_srem_pow2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: sarl $31, %edx -; X86-NEXT: shrl $28, %edx -; X86-NEXT: addl %ecx, %edx +; X86-NEXT: leal 15(%ecx), %edx +; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: cmovnsl %ecx, %edx ; X86-NEXT: andl $-16, %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx @@ -330,10 +329,10 @@ ; ; X64-LABEL: test_srem_pow2: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: sarl $31, %ecx -; X64-NEXT: shrl $28, %ecx -; X64-NEXT: addl %edi, %ecx +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal 15(%rdi), %ecx +; X64-NEXT: testl %edi, %edi +; X64-NEXT: cmovnsl %edi, %ecx ; X64-NEXT: andl $-16, %ecx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpl %ecx, %edi diff --git a/llvm/test/CodeGen/X86/twoaddr-coalesce-3.ll b/llvm/test/CodeGen/X86/twoaddr-coalesce-3.ll --- a/llvm/test/CodeGen/X86/twoaddr-coalesce-3.ll +++ b/llvm/test/CodeGen/X86/twoaddr-coalesce-3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-- -relocation-model=pic | FileCheck %s +; RUN: llc < %s -mtriple=i386-- -relocation-model=pic | FileCheck %s ; This test is to ensure the TwoAddrInstruction pass chooses the proper operands to ; merge and generates fewer mov insns.