Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -40972,20 +40972,38 @@ return Op.getValue(1); } -static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG) { +static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) && "Expected X86ISD::ADD or X86ISD::SUB"); - // If we don't use the flag result, simplify back to a simple ADD/SUB. - if (N->hasAnyUseOfValue(1)) - return SDValue(); - - SDLoc DL(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - SDValue Res = DAG.getNode(X86ISD::ADD == N->getOpcode() ? ISD::ADD : ISD::SUB, - DL, LHS.getSimpleValueType(), LHS, RHS); - return DAG.getMergeValues({Res, DAG.getConstant(0, DL, MVT::i32)}, DL); + MVT VT = LHS.getSimpleValueType(); + unsigned GenericOpc = X86ISD::ADD == N->getOpcode() ? ISD::ADD : ISD::SUB; + + // If we don't use the flag result, simplify back to a generic ADD/SUB. + if (!N->hasAnyUseOfValue(1)) { + SDLoc DL(N); + SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS); + return DAG.getMergeValues({Res, DAG.getConstant(0, DL, MVT::i32)}, DL); + } + + // Fold any similar generic ADD/SUB opcodes to reuse this node. + auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) { + // TODO: Add SUB(RHS, LHS) -> SUB(0, SUB(LHS, RHS)) negation support, this + // currently causes regressions as we don't have broad x86sub combines. + if (Negate) + return; + SDValue Ops[] = {N0, N1}; + SDVTList VTs = DAG.getVTList(N->getValueType(0)); + if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) + DCI.CombineTo(GenericAddSub, SDValue(N, 0)); + }; + MatchGeneric(LHS, RHS, false); + MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode()); + + return SDValue(); } static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) { @@ -42198,7 +42216,7 @@ case ISD::ADD: return combineAdd(N, DAG, Subtarget); case ISD::SUB: return combineSub(N, DAG, Subtarget); case X86ISD::ADD: - case X86ISD::SUB: return combineX86AddSub(N, DAG); + case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI); case X86ISD::SBB: return combineSBB(N, DAG); case X86ISD::ADC: return combineADC(N, DAG, DCI); case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget); Index: llvm/trunk/test/CodeGen/X86/combine-adc.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/combine-adc.ll +++ llvm/trunk/test/CodeGen/X86/combine-adc.ll @@ -5,33 +5,22 @@ define i32 @PR40483_add1(i32*, i32) nounwind { ; X86-LABEL: PR40483_add1: ; X86: # %bb.0: -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl (%edx), %esi -; X86-NEXT: leal (%esi,%ecx), %eax -; X86-NEXT: addl %ecx, %esi -; X86-NEXT: movl %esi, (%edx) -; X86-NEXT: jae .LBB0_1 -; X86-NEXT: # %bb.2: +; X86-NEXT: movl (%ecx), %eax +; X86-NEXT: addl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%ecx) +; X86-NEXT: jae .LBB0_2 +; X86-NEXT: # %bb.1: ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl -; X86-NEXT: .LBB0_1: -; X86-NEXT: orl %eax, %eax -; X86-NEXT: popl %esi +; X86-NEXT: .LBB0_2: ; X86-NEXT: retl ; ; X64-LABEL: PR40483_add1: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: leal (%rcx,%rsi), %edx -; X64-NEXT: orl %edx, %edx ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: addl %esi, %ecx -; X64-NEXT: movl %ecx, (%rdi) -; X64-NEXT: cmovael %edx, %eax +; X64-NEXT: addl (%rdi), %esi +; X64-NEXT: movl %esi, (%rdi) +; X64-NEXT: cmovael %esi, %eax ; X64-NEXT: retq %3 = load i32, i32* %0, align 8 %4 = tail call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 %3, i32 %1) @@ -48,34 +37,23 @@ define i32 @PR40483_add2(i32*, i32) nounwind { ; X86-LABEL: PR40483_add2: ; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %edi -; X86-NEXT: leal (%edi,%edx), %ecx +; X86-NEXT: movl (%edx), %ecx ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: addl %edx, %edi -; X86-NEXT: movl %edi, (%esi) +; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, (%edx) ; X86-NEXT: jae .LBB1_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: orl %ecx, %ecx ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: .LBB1_2: -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi ; X86-NEXT: retl ; ; X64-LABEL: PR40483_add2: ; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: leal (%rcx,%rsi), %edx -; X64-NEXT: orl %edx, %edx ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: addl %esi, %ecx -; X64-NEXT: movl %ecx, (%rdi) -; X64-NEXT: cmovbl %edx, %eax +; X64-NEXT: addl (%rdi), %esi +; X64-NEXT: movl %esi, (%rdi) +; X64-NEXT: cmovbl %esi, %eax ; X64-NEXT: retq %3 = load i32, i32* %0, align 8 %4 = tail call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 %3, i32 %1) Index: llvm/trunk/test/CodeGen/X86/combine-sbb.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/combine-sbb.ll +++ llvm/trunk/test/CodeGen/X86/combine-sbb.ll @@ -11,11 +11,11 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl (%ecx), %esi -; X86-NEXT: movl 4(%ecx), %ecx -; X86-NEXT: subl (%edx), %esi -; X86-NEXT: sbbl 4(%edx), %ecx -; X86-NEXT: movl %ecx, 4(%eax) +; X86-NEXT: movl (%edx), %esi +; X86-NEXT: movl 4(%edx), %edx +; X86-NEXT: subl (%ecx), %esi +; X86-NEXT: sbbl 4(%ecx), %edx +; X86-NEXT: movl %edx, 4(%eax) ; X86-NEXT: movl %esi, (%eax) ; X86-NEXT: popl %esi ; X86-NEXT: retl $4 @@ -249,35 +249,24 @@ define i32 @PR40483_sub4(i32*, i32) nounwind { ; X86-LABEL: PR40483_sub4: ; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %edi -; X86-NEXT: movl %edi, %ecx -; X86-NEXT: subl %edx, %ecx +; X86-NEXT: movl (%edx), %ecx ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: subl %edx, %edi -; X86-NEXT: movl %edi, (%esi) +; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, (%edx) ; X86-NEXT: jae .LBB6_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: orl %ecx, %ecx ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: .LBB6_2: -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi ; X86-NEXT: retl ; ; X64-LABEL: PR40483_sub4: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: subl %esi, %edx -; X64-NEXT: orl %edx, %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: subl %esi, %ecx -; X64-NEXT: movl %ecx, (%rdi) -; X64-NEXT: cmovbl %edx, %eax +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: subl %esi, %eax +; X64-NEXT: movl %eax, (%rdi) +; X64-NEXT: cmovael %ecx, %eax ; X64-NEXT: retq %3 = load i32, i32* %0, align 8 %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1)