Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -41014,6 +41014,7 @@ assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) && "Expected X86ISD::ADD or X86ISD::SUB"); + SDLoc DL(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); MVT VT = LHS.getSimpleValueType(); @@ -41021,21 +41022,20 @@ // If we don't use the flag result, simplify back to a generic ADD/SUB. if (!N->hasAnyUseOfValue(1)) { - SDLoc DL(N); SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS); return DAG.getMergeValues({Res, DAG.getConstant(0, DL, MVT::i32)}, DL); } // Fold any similar generic ADD/SUB opcodes to reuse this node. auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) { - // TODO: Add SUB(RHS, LHS) -> SUB(0, SUB(LHS, RHS)) negation support, this - // currently causes regressions as we don't have broad x86sub combines. - if (Negate) - return; SDValue Ops[] = {N0, N1}; SDVTList VTs = DAG.getVTList(N->getValueType(0)); - if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) - DCI.CombineTo(GenericAddSub, SDValue(N, 0)); + if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) { + SDValue Op(N, 0); + if (Negate) + Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); + DCI.CombineTo(GenericAddSub, Op); + } }; MatchGeneric(LHS, RHS, false); MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode()); Index: test/CodeGen/X86/combine-sbb.ll =================================================================== --- test/CodeGen/X86/combine-sbb.ll +++ test/CodeGen/X86/combine-sbb.ll @@ -199,26 +199,24 @@ define i32 @PR40483_sub3(i32*, i32) nounwind { ; X86-LABEL: PR40483_sub3: ; X86: # %bb.0: -; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl (%eax), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %edx -; X86-NEXT: movl %edx, %eax -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: movl %edx, %edi -; X86-NEXT: subl %ecx, %edi -; X86-NEXT: movl %edi, (%esi) +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: subl %esi, %ecx +; X86-NEXT: subl %esi, %edx +; X86-NEXT: movl %edx, (%eax) ; X86-NEXT: jae .LBB5_1 ; X86-NEXT: # %bb.2: ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: jmp .LBB5_3 +; X86-NEXT: popl %esi +; X86-NEXT: retl ; X86-NEXT: .LBB5_1: -; X86-NEXT: subl %edx, %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: negl %eax ; X86-NEXT: orl %ecx, %eax -; X86-NEXT: .LBB5_3: ; X86-NEXT: popl %esi -; X86-NEXT: popl %edi ; X86-NEXT: retl ; ; X64-LABEL: PR40483_sub3: @@ -226,8 +224,8 @@ ; X64-NEXT: movl (%rdi), %ecx ; X64-NEXT: movl %ecx, %eax ; X64-NEXT: subl %esi, %eax -; X64-NEXT: movl %esi, %edx -; X64-NEXT: subl %ecx, %edx +; X64-NEXT: movl %eax, %edx +; X64-NEXT: negl %edx ; X64-NEXT: orl %eax, %edx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: subl %esi, %ecx Index: test/CodeGen/X86/jump_sign.ll =================================================================== --- test/CodeGen/X86/jump_sign.ll +++ test/CodeGen/X86/jump_sign.ll @@ -48,11 +48,16 @@ define i32 @func_h(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: func_h: ; CHECK: # %bb.0: +; CHECK-NEXT: pushl %esi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: subl %ecx, %eax -; CHECK-NEXT: cmovlel %edx, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl %ecx, %esi +; CHECK-NEXT: subl %edx, %esi +; CHECK-NEXT: negl %esi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpl %edx, %ecx +; CHECK-NEXT: cmovll %esi, %eax +; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl %cmp = icmp slt i32 %b, %a %sub = sub nsw i32 %a, %b @@ -91,11 +96,16 @@ define i32 @func_k(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: func_k: ; CHECK: # %bb.0: +; CHECK-NEXT: pushl %esi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: subl %ecx, %eax -; CHECK-NEXT: cmovbel %edx, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl %ecx, %esi +; CHECK-NEXT: subl %edx, %esi +; CHECK-NEXT: negl %esi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpl %edx, %ecx +; CHECK-NEXT: cmovbl %esi, %eax +; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl %cmp = icmp ult i32 %b, %a %sub = sub i32 %a, %b @@ -111,7 +121,9 @@ ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: subl %ecx, %eax -; CHECK-NEXT: cmovlel %edx, %eax +; CHECK-NEXT: negl %eax +; CHECK-NEXT: cmpl %ecx, %edx +; CHECK-NEXT: cmovgel %ecx, %eax ; CHECK-NEXT: retl %cmp = icmp slt i32 %b, %a %sub = sub nsw i32 %a, %b @@ -138,10 +150,11 @@ define i32 @func_l2(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: func_l2: ; CHECK: # %bb.0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: subl %edx, %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: subl %eax, %ecx +; CHECK-NEXT: negl %ecx ; CHECK-NEXT: cmpl %eax, %edx ; CHECK-NEXT: jne .LBB8_2 ; CHECK-NEXT: # %bb.1: # %if.then @@ -167,9 +180,12 @@ ; CHECK-LABEL: func_l3: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: subl %ecx, %eax -; CHECK-NEXT: jge .LBB9_2 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: subl %edx, %eax +; CHECK-NEXT: negl %eax +; CHECK-NEXT: cmpl %edx, %ecx +; CHECK-NEXT: jle .LBB9_2 ; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: retl ; CHECK-NEXT: .LBB9_2: # %if.else @@ -192,11 +208,16 @@ define i32 @func_l4(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: func_l4: ; CHECK: # %bb.0: +; CHECK-NEXT: pushl %esi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: subl %ecx, %eax -; CHECK-NEXT: cmovll %edx, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl %ecx, %esi +; CHECK-NEXT: subl %edx, %esi +; CHECK-NEXT: negl %esi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpl %edx, %ecx +; CHECK-NEXT: cmovlel %esi, %eax +; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl %cmp = icmp sgt i32 %b, %a %sub = sub i32 %a, %b @@ -318,12 +339,10 @@ ; CHECK-LABEL: func_q: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl %ecx, %edx -; CHECK-NEXT: subl %eax, %edx -; CHECK-NEXT: cmpl %ecx, %eax -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: xorl %edx, %eax +; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: sbbl %ecx, %ecx +; CHECK-NEXT: negl %eax +; CHECK-NEXT: xorl %ecx, %eax ; CHECK-NEXT: retl %t1 = icmp ult i32 %a0, %a1 %t2 = sub i32 %a1, %a0