Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2055,6 +2055,11 @@ DAG.haveNoCommonBitsSet(N0, N1)) return DAG.getNode(ISD::OR, DL, VT, N0, N1); + // fold (add (xor a, -1), 1) -> (sub 0, a) + if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), + N0.getOperand(0)); + if (SDValue Combined = visitADDLike(N0, N1, N)) return Combined; @@ -2190,6 +2195,40 @@ return SDValue(); } +static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT, + SelectionDAG &DAG, const TargetLowering &TLI) { + SDValue Cst; + switch(TLI.getBooleanContents(VT)) { + case TargetLowering::ZeroOrOneBooleanContent: + case TargetLowering::UndefinedBooleanContent: + Cst = DAG.getConstant(1, DL, VT); + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + Cst = DAG.getConstant(-1, DL, VT); + break; + default: + llvm_unreachable("Unsupported boolean content"); + } + + return DAG.getNode(ISD::XOR, DL, VT, V, Cst); +} + +static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) { + if (V.getOpcode() != ISD::XOR) return false; + ConstantSDNode *Const = dyn_cast(V.getOperand(1)); + if (!Const) return false; + + switch(TLI.getBooleanContents(VT)) { + case TargetLowering::ZeroOrOneBooleanContent: + return Const->isOne(); + case TargetLowering::ZeroOrNegativeOneBooleanContent: + return Const->isAllOnesValue(); + case TargetLowering::UndefinedBooleanContent: + return (Const->getAPIntValue() & 0x01) == 1; + } + llvm_unreachable("Unsupported boolean content"); +} + SDValue DAGCombiner::visitUADDO(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2220,6 +2259,15 @@ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), DAG.getConstant(0, DL, CarryVT)); + // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry. + if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) { + SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(), + DAG.getConstant(0, DL, VT), + N0.getOperand(0)); + return CombineTo(N, Sub, + flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI)); + } + if (SDValue Combined = visitUADDOLike(N0, N1, N)) return Combined; @@ -2289,10 +2337,11 @@ return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1); } + EVT CarryVT = CarryIn.getValueType(); + // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry. if (isNullConstant(N0) && isNullConstant(N1)) { EVT VT = N0.getValueType(); - EVT CarryVT = CarryIn.getValueType(); SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT); AddToWorklist(CarryExt.getNode()); return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt, @@ -2300,6 +2349,16 @@ DAG.getConstant(0, DL, CarryVT)); } + // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry. + if (isBitwiseNot(N0) && isNullConstant(N1) && + isBooleanFlip(CarryIn, CarryVT, TLI)) { + SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), + DAG.getConstant(0, DL, N0.getValueType()), + N0.getOperand(0), CarryIn.getOperand(0)); + return CombineTo(N, Sub, + flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI)); + } + if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N)) return Combined; Index: test/CodeGen/X86/add.ll =================================================================== --- test/CodeGen/X86/add.ll +++ test/CodeGen/X86/add.ll @@ -386,23 +386,20 @@ define i32 @inc_not(i32 %a) { ; X32-LABEL: inc_not: ; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: notl %eax -; X32-NEXT: incl %eax +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: subl {{[0-9]+}}(%esp), %eax ; X32-NEXT: retl ; ; X64-LINUX-LABEL: inc_not: ; X64-LINUX: # %bb.0: -; X64-LINUX-NEXT: # kill: def $edi killed $edi def $rdi -; X64-LINUX-NEXT: notl %edi -; X64-LINUX-NEXT: leal 1(%rdi), %eax +; X64-LINUX-NEXT: negl %edi +; X64-LINUX-NEXT: movl %edi, %eax ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: inc_not: ; X64-WIN32: # %bb.0: -; X64-WIN32-NEXT: # kill: def $ecx killed $ecx def $rcx -; X64-WIN32-NEXT: notl %ecx -; X64-WIN32-NEXT: leal 1(%rcx), %eax +; X64-WIN32-NEXT: negl %ecx +; X64-WIN32-NEXT: movl %ecx, %eax ; X64-WIN32-NEXT: retq %nota = xor i32 %a, -1 %r = add i32 %nota, 1 @@ -414,27 +411,24 @@ ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: notl %edx -; X32-NEXT: addl $1, %edx +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: subl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl %edx, (%ecx) -; X32-NEXT: setb (%eax) +; X32-NEXT: setae (%eax) ; X32-NEXT: retl ; ; X64-LINUX-LABEL: uaddo1_not: ; X64-LINUX: # %bb.0: -; X64-LINUX-NEXT: notl %edi -; X64-LINUX-NEXT: addl $1, %edi +; X64-LINUX-NEXT: negl %edi ; X64-LINUX-NEXT: movl %edi, (%rsi) -; X64-LINUX-NEXT: setb (%rdx) +; X64-LINUX-NEXT: setae (%rdx) ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: uaddo1_not: ; X64-WIN32: # %bb.0: -; X64-WIN32-NEXT: notl %ecx -; X64-WIN32-NEXT: addl $1, %ecx +; X64-WIN32-NEXT: negl %ecx ; X64-WIN32-NEXT: movl %ecx, (%rdx) -; X64-WIN32-NEXT: setb (%r8) +; X64-WIN32-NEXT: setae (%r8) ; X64-WIN32-NEXT: retq %nota = xor i32 %a, -1 %uaddo = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %nota, i32 1) Index: test/CodeGen/X86/addcarry.ll =================================================================== --- test/CodeGen/X86/addcarry.ll +++ test/CodeGen/X86/addcarry.ll @@ -321,12 +321,10 @@ define i128 @addcarry1_not(i128 %n) { ; CHECK-LABEL: addcarry1_not: ; CHECK: # %bb.0: -; CHECK-NEXT: notq %rsi -; CHECK-NEXT: notq %rdi -; CHECK-NEXT: addq $1, %rdi -; CHECK-NEXT: adcq $0, %rsi +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: negq %rdi +; CHECK-NEXT: sbbq %rsi, %rdx ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: movq %rsi, %rdx ; CHECK-NEXT: retq %1 = xor i128 %n, -1 %2 = add i128 %1, 1 Index: test/CodeGen/X86/subcarry.ll =================================================================== --- test/CodeGen/X86/subcarry.ll +++ test/CodeGen/X86/subcarry.ll @@ -37,22 +37,18 @@ define %S @negate(%S* nocapture readonly %this) { ; CHECK-LABEL: negate: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq (%rsi), %rax -; CHECK-NEXT: movq 8(%rsi), %rcx -; CHECK-NEXT: notq %rax -; CHECK-NEXT: addq $1, %rax -; CHECK-NEXT: notq %rcx -; CHECK-NEXT: adcq $0, %rcx -; CHECK-NEXT: movq 16(%rsi), %rdx -; CHECK-NEXT: notq %rdx -; CHECK-NEXT: adcq $0, %rdx -; CHECK-NEXT: movq 24(%rsi), %rsi -; CHECK-NEXT: notq %rsi -; CHECK-NEXT: adcq $0, %rsi -; CHECK-NEXT: movq %rax, (%rdi) -; CHECK-NEXT: movq %rcx, 8(%rdi) -; CHECK-NEXT: movq %rdx, 16(%rdi) -; CHECK-NEXT: movq %rsi, 24(%rdi) +; CHECK-NEXT: xorl %r8d, %r8d +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: subq (%rsi), %rcx +; CHECK-NEXT: movl $0, %edx +; CHECK-NEXT: sbbq 8(%rsi), %rdx +; CHECK-NEXT: movl $0, %eax +; CHECK-NEXT: sbbq 16(%rsi), %rax +; CHECK-NEXT: sbbq 24(%rsi), %r8 +; CHECK-NEXT: movq %rcx, (%rdi) +; CHECK-NEXT: movq %rdx, 8(%rdi) +; CHECK-NEXT: movq %rax, 16(%rdi) +; CHECK-NEXT: movq %r8, 24(%rdi) ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: retq entry: