Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -42653,6 +42653,7 @@ assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) && "Expected X86ISD::ADD or X86ISD::SUB"); + SDLoc DL(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); MVT VT = LHS.getSimpleValueType(); @@ -42660,21 +42661,20 @@ // If we don't use the flag result, simplify back to a generic ADD/SUB. if (!N->hasAnyUseOfValue(1)) { - SDLoc DL(N); SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS); return DAG.getMergeValues({Res, DAG.getConstant(0, DL, MVT::i32)}, DL); } // Fold any similar generic ADD/SUB opcodes to reuse this node. auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) { - // TODO: Add SUB(RHS, LHS) -> SUB(0, SUB(LHS, RHS)) negation support, this - // currently causes regressions as we don't have broad x86sub combines. - if (Negate) - return; SDValue Ops[] = {N0, N1}; SDVTList VTs = DAG.getVTList(N->getValueType(0)); - if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) - DCI.CombineTo(GenericAddSub, SDValue(N, 0)); + if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) { + SDValue Op(N, 0); + if (Negate) + Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); + DCI.CombineTo(GenericAddSub, Op); + } }; MatchGeneric(LHS, RHS, false); MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode()); Index: llvm/trunk/test/CodeGen/X86/combine-sbb.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/combine-sbb.ll +++ llvm/trunk/test/CodeGen/X86/combine-sbb.ll @@ -199,26 +199,24 @@ define i32 @PR40483_sub3(i32*, i32) nounwind { ; X86-LABEL: PR40483_sub3: ; X86: # %bb.0: -; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl (%eax), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %edx -; X86-NEXT: movl %edx, %eax -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: movl %edx, %edi -; X86-NEXT: subl %ecx, %edi -; X86-NEXT: movl %edi, (%esi) +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: subl %esi, %ecx +; X86-NEXT: subl %esi, %edx +; X86-NEXT: movl %edx, (%eax) ; X86-NEXT: jae .LBB5_1 ; X86-NEXT: # %bb.2: ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: jmp .LBB5_3 +; X86-NEXT: popl %esi +; X86-NEXT: retl ; X86-NEXT: .LBB5_1: -; X86-NEXT: subl %edx, %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: negl %eax ; X86-NEXT: orl %ecx, %eax -; X86-NEXT: .LBB5_3: ; X86-NEXT: popl %esi -; X86-NEXT: popl %edi ; X86-NEXT: retl ; ; X64-LABEL: PR40483_sub3: @@ -226,8 +224,8 @@ ; X64-NEXT: movl (%rdi), %ecx ; X64-NEXT: movl %ecx, %eax ; X64-NEXT: subl %esi, %eax -; X64-NEXT: movl %esi, %edx -; X64-NEXT: subl %ecx, %edx +; X64-NEXT: movl %eax, %edx +; X64-NEXT: negl %edx ; X64-NEXT: orl %eax, %edx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: subl %esi, %ecx Index: llvm/trunk/test/CodeGen/X86/jump_sign.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/jump_sign.ll +++ llvm/trunk/test/CodeGen/X86/jump_sign.ll @@ -303,18 +303,16 @@ } ; PR13475 -; If we have sub a, b and cmp b, a and the result of cmp is used -; by sbb, we should not optimize cmp away. +; We don't need an explicit cmp here. A sub/neg combo will do. + define i32 @func_q(i32 %a0, i32 %a1, i32 %a2) { ; CHECK-LABEL: func_q: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl %ecx, %edx -; CHECK-NEXT: subl %eax, %edx -; CHECK-NEXT: cmpl %ecx, %eax -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: xorl %edx, %eax +; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: sbbl %ecx, %ecx +; CHECK-NEXT: negl %eax +; CHECK-NEXT: xorl %ecx, %eax ; CHECK-NEXT: retl %t1 = icmp ult i32 %a0, %a1 %t2 = sub i32 %a1, %a0