diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3538,6 +3538,11 @@ } } + // See if we have a sub(X,mul(div(X,Y),Y)) == rem(X,Y) pattern that could be + // merged into a divrem. + if (SDValue DivRem = useDivRem(N)) + return DivRem.getValue(1); + // If the relocation model supports it, consider symbol offsets. if (GlobalAddressSDNode *GA = dyn_cast(N0)) if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { @@ -4021,15 +4026,39 @@ if (Node->use_empty()) return SDValue(); // This is a dead node, leave it alone. - unsigned Opcode = Node->getOpcode(); - bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); - unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; - // DivMod lib calls can still work on non-legal types if using lib-calls. EVT VT = Node->getValueType(0); if (VT.isVector() || !VT.isInteger()) return SDValue(); + unsigned Opcode = Node->getOpcode(); + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + + // Check for a sub(X,mul(div(X,Y),Y)) == rem(X,Y) pattern. + if (Opcode == ISD::SUB) { + if (Op1.getOpcode() != ISD::MUL) + return SDValue(); + auto IsDiv = [&](SDValue Mul0, SDValue Mul1) { + unsigned Opc = Mul0.getOpcode(); + if ((Opc != ISD::SDIV && Opc != ISD::UDIV) || Mul0.getOperand(0) != Op0 || + Mul0.getOperand(1) != Mul1) + return false; + Opcode = Opc == ISD::SDIV ? ISD::SREM : ISD::UREM; + Op1 = Mul0.getOperand(1); + return true; + }; + if (!IsDiv(Op1.getOperand(0), Op1.getOperand(1)) && + !IsDiv(Op1.getOperand(1), Op1.getOperand(0))) + return SDValue(); + } + + assert((Opcode == ISD::SDIV || Opcode == ISD::SREM || Opcode == ISD::UDIV || + Opcode == ISD::UREM) && + "Unknown div/rem opcode"); + bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT)) return SDValue(); @@ -4051,8 +4080,6 @@ return SDValue(); } - SDValue Op0 = Node->getOperand(0); - SDValue Op1 = Node->getOperand(1); SDValue combined; for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), UE = Op0.getNode()->use_end(); UI != UE; ++UI) { diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll --- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll +++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll @@ -11,25 +11,20 @@ define i8 @scalar_i8(i8 %x, i8 %y, i8* %divdst) nounwind { ; X86-LABEL: scalar_i8: ; X86: # %bb.0: +; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: idivb {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movb {{[0-9]+}}(%esp), %ch -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movsbl %cl, %eax -; X86-NEXT: idivb %ch +; X86-NEXT: movsbl %ah, %ecx ; X86-NEXT: movb %al, (%edx) -; X86-NEXT: mulb %ch -; X86-NEXT: subb %al, %cl ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: scalar_i8: ; X64: # %bb.0: -; X64-NEXT: movsbl %dil, %ecx -; X64-NEXT: movl %ecx, %eax +; X64-NEXT: movsbl %dil, %eax ; X64-NEXT: idivb %sil +; X64-NEXT: movsbl %ah, %ecx ; X64-NEXT: movb %al, (%rdx) -; X64-NEXT: mulb %sil -; X64-NEXT: subb %al, %cl ; X64-NEXT: movl %ecx, %eax ; X64-NEXT: retq %div = sdiv i8 %x, %y @@ -42,34 +37,23 @@ define i16 @scalar_i16(i16 %x, i16 %y, i16* %divdst) nounwind { ; X86-LABEL: scalar_i16: ; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: cwtd -; X86-NEXT: idivw %si -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: movw %ax, (%edi) -; X86-NEXT: imull %eax, %esi -; X86-NEXT: subl %esi, %ecx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi +; X86-NEXT: idivw {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movw %ax, (%ecx) +; X86-NEXT: movl %edx, %eax ; X86-NEXT: retl ; ; X64-LABEL: scalar_i16: ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: cwtd ; X64-NEXT: idivw %si -; X64-NEXT: # kill: def $ax killed $ax def $eax ; X64-NEXT: movw %ax, (%rcx) -; X64-NEXT: imull %eax, %esi -; X64-NEXT: subl %esi, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %edx, %eax ; X64-NEXT: retq %div = sdiv i16 %x, %y store i16 %div, i16* %divdst, align 4 @@ -81,20 +65,12 @@ define i32 @scalar_i32(i32 %x, i32 %y, i32* %divdst) nounwind { ; X86-LABEL: scalar_i32: ; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: cltd -; X86-NEXT: idivl %edi -; X86-NEXT: movl %eax, (%esi) -; X86-NEXT: imull %edi, %eax -; X86-NEXT: subl %eax, %ecx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi +; X86-NEXT: idivl {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %eax, (%ecx) +; X86-NEXT: movl %edx, %eax ; X86-NEXT: retl ; ; X64-LABEL: scalar_i32: @@ -104,9 +80,7 @@ ; X64-NEXT: cltd ; X64-NEXT: idivl %esi ; X64-NEXT: movl %eax, (%rcx) -; X64-NEXT: imull %esi, %eax -; X64-NEXT: subl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %edx, %eax ; X64-NEXT: retq %div = sdiv i32 %x, %y store i32 %div, i32* %divdst, align 4 @@ -158,9 +132,7 @@ ; X64-NEXT: cqto ; X64-NEXT: idivq %rsi ; X64-NEXT: movq %rax, (%rcx) -; X64-NEXT: imulq %rsi, %rax -; X64-NEXT: subq %rax, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdx, %rax ; X64-NEXT: retq %div = sdiv i64 %x, %y store i64 %div, i64* %divdst, align 4 @@ -880,34 +852,23 @@ define i32 @scalar_i32_commutative(i32 %x, i32* %ysrc, i32* %divdst) nounwind { ; X86-LABEL: scalar_i32_commutative: ; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%eax), %edi -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: cltd -; X86-NEXT: idivl %edi -; X86-NEXT: movl %eax, (%esi) -; X86-NEXT: imull %eax, %edi -; X86-NEXT: subl %edi, %ecx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi +; X86-NEXT: idivl (%ecx) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %eax, (%ecx) +; X86-NEXT: movl %edx, %eax ; X86-NEXT: retl ; ; X64-LABEL: scalar_i32_commutative: ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movl (%rsi), %esi ; X64-NEXT: movl %edi, %eax ; X64-NEXT: cltd -; X64-NEXT: idivl %esi +; X64-NEXT: idivl (%rsi) ; X64-NEXT: movl %eax, (%rcx) -; X64-NEXT: imull %eax, %esi -; X64-NEXT: subl %esi, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %edx, %eax ; X64-NEXT: retq %y = load i32, i32* %ysrc, align 4 %div = sdiv i32 %x, %y @@ -921,24 +882,20 @@ define i32 @extrause(i32 %x, i32 %y, i32* %divdst, i32* %t1dst) nounwind { ; X86-LABEL: extrause: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ecx, %eax ; X86-NEXT: cltd -; X86-NEXT: idivl %ebx +; X86-NEXT: idivl %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %eax, (%edi) -; X86-NEXT: imull %ebx, %eax +; X86-NEXT: imull %ecx, %eax ; X86-NEXT: movl %eax, (%esi) -; X86-NEXT: subl %eax, %ecx -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %edx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx ; X86-NEXT: retl ; ; X64-LABEL: extrause: @@ -950,8 +907,7 @@ ; X64-NEXT: movl %eax, (%r8) ; X64-NEXT: imull %esi, %eax ; X64-NEXT: movl %eax, (%rcx) -; X64-NEXT: subl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %edx, %eax ; X64-NEXT: retq %div = sdiv i32 %x, %y store i32 %div, i32* %divdst, align 4 diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll --- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll +++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll @@ -11,25 +11,20 @@ define i8 @scalar_i8(i8 %x, i8 %y, i8* %divdst) nounwind { ; X86-LABEL: scalar_i8: ; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: divb {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movb {{[0-9]+}}(%esp), %ch -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movzbl %cl, %eax -; X86-NEXT: divb %ch +; X86-NEXT: movzbl %ah, %ecx ; X86-NEXT: movb %al, (%edx) -; X86-NEXT: mulb %ch -; X86-NEXT: subb %al, %cl ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: scalar_i8: ; X64: # %bb.0: -; X64-NEXT: movzbl %dil, %ecx -; X64-NEXT: movl %ecx, %eax +; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: divb %sil +; X64-NEXT: movzbl %ah, %ecx ; X64-NEXT: movb %al, (%rdx) -; X64-NEXT: mulb %sil -; X64-NEXT: subb %al, %cl ; X64-NEXT: movl %ecx, %eax ; X64-NEXT: retq %div = udiv i8 %x, %y @@ -42,34 +37,23 @@ define i16 @scalar_i16(i16 %x, i16 %y, i16* %divdst) nounwind { ; X86-LABEL: scalar_i16: ; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: divw %si -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: movw %ax, (%edi) -; X86-NEXT: imull %eax, %esi -; X86-NEXT: subl %esi, %ecx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi +; X86-NEXT: divw {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movw %ax, (%ecx) +; X86-NEXT: movl %edx, %eax ; X86-NEXT: retl ; ; X64-LABEL: scalar_i16: ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divw %si -; X64-NEXT: # kill: def $ax killed $ax def $eax ; X64-NEXT: movw %ax, (%rcx) -; X64-NEXT: imull %eax, %esi -; X64-NEXT: subl %esi, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %edx, %eax ; X64-NEXT: retq %div = udiv i16 %x, %y store i16 %div, i16* %divdst, align 4 @@ -81,20 +65,12 @@ define i32 @scalar_i32(i32 %x, i32 %y, i32* %divdst) nounwind { ; X86-LABEL: scalar_i32: ; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: divl %edi -; X86-NEXT: movl %eax, (%esi) -; X86-NEXT: imull %edi, %eax -; X86-NEXT: subl %eax, %ecx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi +; X86-NEXT: divl {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %eax, (%ecx) +; X86-NEXT: movl %edx, %eax ; X86-NEXT: retl ; ; X64-LABEL: scalar_i32: @@ -104,9 +80,7 @@ ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divl %esi ; X64-NEXT: movl %eax, (%rcx) -; X64-NEXT: imull %esi, %eax -; X64-NEXT: subl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %edx, %eax ; X64-NEXT: retq %div = udiv i32 %x, %y store i32 %div, i32* %divdst, align 4 @@ -158,9 +132,7 @@ ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divq %rsi ; X64-NEXT: movq %rax, (%rcx) -; X64-NEXT: imulq %rsi, %rax -; X64-NEXT: subq %rax, %rdi -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdx, %rax ; X64-NEXT: retq %div = udiv i64 %x, %y store i64 %div, i64* %divdst, align 4 @@ -880,34 +852,23 @@ define i32 @scalar_i32_commutative(i32 %x, i32* %ysrc, i32* %divdst) nounwind { ; X86-LABEL: scalar_i32_commutative: ; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%eax), %edi -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: divl %edi -; X86-NEXT: movl %eax, (%esi) -; X86-NEXT: imull %eax, %edi -; X86-NEXT: subl %edi, %ecx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi +; X86-NEXT: divl (%ecx) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %eax, (%ecx) +; X86-NEXT: movl %edx, %eax ; X86-NEXT: retl ; ; X64-LABEL: scalar_i32_commutative: ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movl (%rsi), %esi ; X64-NEXT: movl %edi, %eax ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: divl %esi +; X64-NEXT: divl (%rsi) ; X64-NEXT: movl %eax, (%rcx) -; X64-NEXT: imull %eax, %esi -; X64-NEXT: subl %esi, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %edx, %eax ; X64-NEXT: retq %y = load i32, i32* %ysrc, align 4 %div = udiv i32 %x, %y @@ -921,24 +882,20 @@ define i32 @extrause(i32 %x, i32 %y, i32* %divdst, i32* %t1dst) nounwind { ; X86-LABEL: extrause: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ecx, %eax ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: divl %ebx +; X86-NEXT: divl %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %eax, (%edi) -; X86-NEXT: imull %ebx, %eax +; X86-NEXT: imull %ecx, %eax ; X86-NEXT: movl %eax, (%esi) -; X86-NEXT: subl %eax, %ecx -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %edx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx ; X86-NEXT: retl ; ; X64-LABEL: extrause: @@ -950,8 +907,7 @@ ; X64-NEXT: movl %eax, (%r8) ; X64-NEXT: imull %esi, %eax ; X64-NEXT: movl %eax, (%rcx) -; X64-NEXT: subl %eax, %edi -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %edx, %eax ; X64-NEXT: retq %div = udiv i32 %x, %y store i32 %div, i32* %divdst, align 4