Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -1478,6 +1478,9 @@ /// Reassociate floating point divisions into multiply by reciprocal. unsigned combineRepeatedFPDivisors() const override; + + SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, + SmallVectorImpl &Created) const override; }; namespace X86 { Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -20080,6 +20080,61 @@ return 2; } +SDValue +X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + SmallVectorImpl &Created) const { + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + if (isIntDivCheap(N->getValueType(0), Attr)) + return SDValue(N,0); // Lower SDIV as SDIV + + assert((Divisor.isPowerOf2() || (-Divisor).isPowerOf2()) && + "Unexpected divisor!"); + + // Only perform this transform if CMOV is supported otherwise the select + // below will become a branch. + if (!Subtarget.hasCMov()) + return SDValue(); + + // fold (sdiv X, pow2) + EVT VT = N->getValueType(0); + // FIXME: Support i8/i16. + if ((VT != MVT::i32 && !(Subtarget.is64Bit() && VT == MVT::i64))) + return SDValue(); + + unsigned Lg2 = Divisor.countTrailingZeros(); + + // If the divisor is 2 or -2, the default expansion is better. + if (Lg2 == 1) + return SDValue(); + + SDLoc DL(N); + SDValue N0 = N->getOperand(0); + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT); + + // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right. + SDValue Cmp = DAG.getSetCC(DL, MVT::i8, N0, Zero, ISD::SETLT); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne); + SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0); + + Created.push_back(Cmp.getNode()); + Created.push_back(Add.getNode()); + Created.push_back(CMov.getNode()); + + // Divide by pow2. + SDValue SRA = + DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i64)); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (Divisor.isNonNegative()) + return SRA; + + Created.push_back(SRA.getNode()); + return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA); +} + /// Result of 'and' is compared against zero. Change to a BT node if possible. /// Returns the BT node and the condition code needed to use it. static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, Index: llvm/trunk/test/CodeGen/X86/combine-sdiv.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/combine-sdiv.ll +++ llvm/trunk/test/CodeGen/X86/combine-sdiv.ll @@ -3200,10 +3200,9 @@ ; CHECK-LABEL: combine_i32_sdiv_pow2: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: sarl $31, %eax -; CHECK-NEXT: shrl $28, %eax -; CHECK-NEXT: addl %edi, %eax +; CHECK-NEXT: leal 15(%rdi), %eax +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: cmovnsl %edi, %eax ; CHECK-NEXT: sarl $4, %eax ; CHECK-NEXT: retq %1 = sdiv i32 %x, 16 @@ -3214,10 +3213,9 @@ ; CHECK-LABEL: combine_i32_sdiv_negpow2: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: sarl $31, %eax -; CHECK-NEXT: shrl $24, %eax -; CHECK-NEXT: addl %edi, %eax +; CHECK-NEXT: leal 255(%rdi), %eax +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: cmovnsl %edi, %eax ; CHECK-NEXT: sarl $8, %eax ; CHECK-NEXT: negl %eax ; CHECK-NEXT: retq @@ -3228,10 +3226,9 @@ define i64 @combine_i64_sdiv_pow2(i64 %x) { ; CHECK-LABEL: combine_i64_sdiv_pow2: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: sarq $63, %rax -; CHECK-NEXT: shrq $60, %rax -; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: leaq 15(%rdi), %rax +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: cmovnsq %rdi, %rax ; CHECK-NEXT: sarq $4, %rax ; CHECK-NEXT: retq %1 = sdiv i64 %x, 16 @@ -3241,10 +3238,9 @@ define i64 @combine_i64_sdiv_negpow2(i64 %x) { ; CHECK-LABEL: combine_i64_sdiv_negpow2: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: sarq $63, %rax -; CHECK-NEXT: shrq $56, %rax -; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: leaq 255(%rdi), %rax +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: cmovnsq %rdi, %rax ; CHECK-NEXT: sarq $8, %rax ; CHECK-NEXT: negq %rax ; CHECK-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/combine-srem.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/combine-srem.ll +++ llvm/trunk/test/CodeGen/X86/combine-srem.ll @@ -56,10 +56,9 @@ ; CHECK-LABEL: combine_srem_by_minsigned: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: sarl $31, %eax -; CHECK-NEXT: shrl %eax -; CHECK-NEXT: addl %edi, %eax +; CHECK-NEXT: leal 2147483647(%rdi), %eax +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: cmovnsl %edi, %eax ; CHECK-NEXT: andl $-2147483648, %eax # imm = 0x80000000 ; CHECK-NEXT: addl %edi, %eax ; CHECK-NEXT: retq @@ -513,12 +512,12 @@ ; CHECK-LABEL: combine_srem_pow2: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: sarl $31, %ecx -; CHECK-NEXT: shrl $28, %ecx -; CHECK-NEXT: addl %edi, %ecx +; CHECK-NEXT: leal 15(%rax), %ecx +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: cmovnsl %edi, %ecx ; CHECK-NEXT: andl $-16, %ecx ; CHECK-NEXT: subl %ecx, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: retq %1 = srem i32 %x, 16 ret i32 %1 @@ -528,12 +527,12 @@ ; CHECK-LABEL: combine_srem_negpow2: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: sarl $31, %ecx -; CHECK-NEXT: shrl $24, %ecx -; CHECK-NEXT: addl %edi, %ecx +; CHECK-NEXT: leal 255(%rax), %ecx +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: cmovnsl %edi, %ecx ; CHECK-NEXT: andl $-256, %ecx ; CHECK-NEXT: subl %ecx, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: retq %1 = srem i32 %x, -256 ret i32 %1 @@ -543,10 +542,9 @@ ; CHECK-LABEL: combine_i64_srem_pow2: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: movq %rdi, %rcx -; CHECK-NEXT: sarq $63, %rcx -; CHECK-NEXT: shrq $60, %rcx -; CHECK-NEXT: addq %rdi, %rcx +; CHECK-NEXT: leaq 15(%rdi), %rcx +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: cmovnsq %rdi, %rcx ; CHECK-NEXT: andq $-16, %rcx ; CHECK-NEXT: subq %rcx, %rax ; CHECK-NEXT: retq @@ -558,10 +556,9 @@ ; CHECK-LABEL: combine_i64_srem_negpow2: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: movq %rdi, %rcx -; CHECK-NEXT: sarq $63, %rcx -; CHECK-NEXT: shrq $56, %rcx -; CHECK-NEXT: addq %rdi, %rcx +; CHECK-NEXT: leaq 255(%rdi), %rcx +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: cmovnsq %rdi, %rcx ; CHECK-NEXT: andq $-256, %rcx ; CHECK-NEXT: subq %rcx, %rax ; CHECK-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/rem.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/rem.ll +++ llvm/trunk/test/CodeGen/X86/rem.ll @@ -27,10 +27,9 @@ ; CHECK-LABEL: test2: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: sarl $31, %ecx -; CHECK-NEXT: shrl $24, %ecx -; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: leal 255(%eax), %ecx +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: cmovnsl %eax, %ecx ; CHECK-NEXT: andl $-256, %ecx ; CHECK-NEXT: subl %ecx, %eax ; CHECK-NEXT: retl Index: llvm/trunk/test/CodeGen/X86/srem-seteq.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/srem-seteq.ll +++ llvm/trunk/test/CodeGen/X86/srem-seteq.ll @@ -318,10 +318,9 @@ ; X86-LABEL: test_srem_pow2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: sarl $31, %edx -; X86-NEXT: shrl $28, %edx -; X86-NEXT: addl %ecx, %edx +; X86-NEXT: leal 15(%ecx), %edx +; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: cmovnsl %ecx, %edx ; X86-NEXT: andl $-16, %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx @@ -330,10 +329,10 @@ ; ; X64-LABEL: test_srem_pow2: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: sarl $31, %ecx -; X64-NEXT: shrl $28, %ecx -; X64-NEXT: addl %edi, %ecx +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal 15(%rdi), %ecx +; X64-NEXT: testl %edi, %edi +; X64-NEXT: cmovnsl %edi, %ecx ; X64-NEXT: andl $-16, %ecx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpl %ecx, %edi @@ -350,10 +349,9 @@ ; X86-LABEL: test_srem_int_min: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: sarl $31, %edx -; X86-NEXT: shrl %edx -; X86-NEXT: addl %ecx, %edx +; X86-NEXT: leal 2147483647(%ecx), %edx +; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: cmovnsl %ecx, %edx ; X86-NEXT: andl $-2147483648, %edx # imm = 0x80000000 ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: addl %ecx, %edx @@ -362,10 +360,10 @@ ; ; X64-LABEL: test_srem_int_min: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: sarl $31, %ecx -; X64-NEXT: shrl %ecx -; X64-NEXT: addl %edi, %ecx +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal 2147483647(%rdi), %ecx +; X64-NEXT: testl %edi, %edi +; X64-NEXT: cmovnsl %edi, %ecx ; X64-NEXT: andl $-2147483648, %ecx # imm = 0x80000000 ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: addl %edi, %ecx