Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2535,6 +2535,97 @@ return SDValue(); } +/** + * If we are facing some sort of ddamoni carry propapagtion patternq try to + * break it up to generate something like: + * (addcarry X, 0, (addcarry A, B, Z):Carry) + * + * The end result is usually an increase in operation required, but because the + * carry is now linearized, other tranforms can kick in and optimize the DAG. + * + * Patterns typically look something like + * (uaddo A, B) + * / \ + * Carry Sum + * | \ + * | (addcarry *, 0, Z) + * | / + * \ Carry + * | / + * (addcarry X, *, *) + * + * But numerous variation exist. Our goal is to identify A, B, X and Z and + * produce a combine with a single path for carry propagation. + */ +static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, + SDValue X, SDValue Carry0, SDValue Carry1, + SDNode *N) { + if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1) + return SDValue(); + if (Carry1.getOpcode() != ISD::UADDO) + return SDValue(); + + SDValue A = Carry1.getOperand(0); + SDValue B = Carry1.getOperand(1); + SDValue Y = Carry0.getOperand(0); + + SDValue Z; + bool DoIt = false; + + /** + * First look for a suitable Z. It will present itself in the form of + * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true + */ + if (Carry0.getOpcode() == ISD::ADDCARRY && + isNullConstant(Carry0.getOperand(1))) { + Z = Carry0.getOperand(2); + } else if (Carry0.getOpcode() == ISD::UADDO && + isOneConstant(Carry0.getOperand(1))) { + EVT VT = Combiner.getSetCCResultType(Carry0.getValueType()); + Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT); + } + + // We couldn't find a suitable Z. + if (!Z) return SDValue(); + + /** + * (uaddo A, B) + * | + * Sum + * | + * (addcarry *, 0, Z) + */ + if (Y == Carry1.getValue(0)) { + DoIt = true; + } + + /** + * (addcarry A, 0, Z) + * | + * Sum + * | + * (uaddo *, B) + */ + if (Carry1.getOperand(0) == Carry0.getValue(0)) { + A = Y; + DoIt = true; + } else if (Carry1.getOperand(1) == Carry0.getValue(0)) { + B = Y; + DoIt = true; + } + + if (DoIt) { + SDLoc DL(N); + auto NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z); + Combiner.AddToWorklist(NewY.getNode()); + return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X, + DAG.getConstant(0, DL, X.getValueType()), + NewY.getValue(1)); + } + + return SDValue(); +} + SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N) { // Iff the flag result is dead: @@ -2545,39 +2636,19 @@ return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0.getOperand(0), N0.getOperand(1), CarryIn); + DeepPatternNodes.insert(N); + /** * When one of the addcarry argument is itself a carry, we may be facing * a diamond carry propagation. In which case we try to transform the DAG * to ensure linear carry propagation if that is possible. - * - * We are trying to get: - * (addcarry X, 0, (addcarry A, B, Z):Carry) */ if (auto Y = getAsCarry(TLI, N1)) { - /** - * (uaddo A, B) - * / \ - * Carry Sum - * | \ - * | (addcarry *, 0, Z) - * | / - * \ Carry - * | / - * (addcarry X, *, *) - */ - if (Y.getOpcode() == ISD::UADDO && - CarryIn.getResNo() == 1 && - CarryIn.getOpcode() == ISD::ADDCARRY && - isNullConstant(CarryIn.getOperand(1)) && - CarryIn.getOperand(0) == Y.getValue(0)) { - auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(), - Y.getOperand(0), Y.getOperand(1), - CarryIn.getOperand(2)); - AddToWorklist(NewY.getNode()); - return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, - DAG.getConstant(0, SDLoc(N), N0.getValueType()), - NewY.getValue(1)); - } + // Because both are carries, Y and Z can be swapped. + if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N)) + return R; + if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N)) + return R; } return SDValue(); Index: test/CodeGen/X86/addcarry.ll =================================================================== --- test/CodeGen/X86/addcarry.ll +++ test/CodeGen/X86/addcarry.ll @@ -312,23 +312,31 @@ define %S @readd(%S* nocapture readonly %this, %S %arg.b) { ; CHECK-LABEL: readd: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: addq (%rsi), %rdx -; CHECK-NEXT: movq 8(%rsi), %r11 -; CHECK-NEXT: adcq $0, %r11 -; CHECK-NEXT: setb %r10b -; CHECK-NEXT: movzbl %r10b, %edi -; CHECK-NEXT: addq %rcx, %r11 -; CHECK-NEXT: adcq 16(%rsi), %rdi +; CHECK-NEXT: movq (%rsi), %r10 +; CHECK-NEXT: movq %rdx, %rdi +; CHECK-NEXT: addq %r10, %rdi +; CHECK-NEXT: movq 8(%rsi), %rdi +; CHECK-NEXT: movq 16(%rsi), %r11 +; CHECK-NEXT: movq %rcx, %rbx +; CHECK-NEXT: adcq %rdi, %rbx +; CHECK-NEXT: addq %r10, %rdx +; CHECK-NEXT: adcq %rdi, %rcx ; CHECK-NEXT: setb %cl -; CHECK-NEXT: movzbl %cl, %ecx -; CHECK-NEXT: addq %r8, %rdi -; CHECK-NEXT: adcq 24(%rsi), %rcx -; CHECK-NEXT: addq %r9, %rcx +; CHECK-NEXT: movq %r8, %rdi +; CHECK-NEXT: adcq %r11, %rdi +; CHECK-NEXT: addb $255, %cl +; CHECK-NEXT: adcq %r11, %r8 +; CHECK-NEXT: adcq 24(%rsi), %r9 ; CHECK-NEXT: movq %rdx, (%rax) -; CHECK-NEXT: movq %r11, 8(%rax) +; CHECK-NEXT: movq %rbx, 8(%rax) ; CHECK-NEXT: movq %rdi, 16(%rax) -; CHECK-NEXT: movq %rcx, 24(%rax) +; CHECK-NEXT: movq %r9, 24(%rax) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq entry: %0 = extractvalue %S %arg.b, 0 Index: test/CodeGen/X86/subcarry.ll =================================================================== --- test/CodeGen/X86/subcarry.ll +++ test/CodeGen/X86/subcarry.ll @@ -90,29 +90,38 @@ define %S @sub(%S* nocapture readonly %this, %S %arg.b) local_unnamed_addr { ; CHECK-LABEL: sub: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: notq %rdx -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: addq (%rsi), %rdx -; CHECK-NEXT: setb %dil -; CHECK-NEXT: addq $1, %rdx -; CHECK-NEXT: adcq 8(%rsi), %rdi -; CHECK-NEXT: setb %r10b -; CHECK-NEXT: movzbl %r10b, %r10d +; CHECK-NEXT: movq (%rsi), %rdi +; CHECK-NEXT: movq 8(%rsi), %r10 +; CHECK-NEXT: movb $1, %r11b +; CHECK-NEXT: addb $-1, %r11b +; CHECK-NEXT: leaq 1(%rdi,%rdx), %r11 +; CHECK-NEXT: adcq %rdx, %rdi ; CHECK-NEXT: notq %rcx -; CHECK-NEXT: addq %rdi, %rcx -; CHECK-NEXT: adcq 16(%rsi), %r10 -; CHECK-NEXT: setb %dil -; CHECK-NEXT: movzbl %dil, %edi +; CHECK-NEXT: setb %dl +; CHECK-NEXT: movq %rcx, %rdi +; CHECK-NEXT: adcq %r10, %rdi +; CHECK-NEXT: movq 16(%rsi), %rbx +; CHECK-NEXT: addb $255, %dl +; CHECK-NEXT: adcq %r10, %rcx ; CHECK-NEXT: notq %r8 -; CHECK-NEXT: addq %r10, %r8 -; CHECK-NEXT: adcq 24(%rsi), %rdi +; CHECK-NEXT: setb %cl +; CHECK-NEXT: movq %r8, %rdx +; CHECK-NEXT: adcq %rbx, %rdx +; CHECK-NEXT: addb $255, %cl +; CHECK-NEXT: adcq %rbx, %r8 ; CHECK-NEXT: notq %r9 -; CHECK-NEXT: addq %rdi, %r9 -; CHECK-NEXT: movq %rdx, (%rax) -; CHECK-NEXT: movq %rcx, 8(%rax) -; CHECK-NEXT: movq %r8, 16(%rax) +; CHECK-NEXT: adcq 24(%rsi), %r9 +; CHECK-NEXT: movq %r11, (%rax) +; CHECK-NEXT: movq %rdi, 8(%rax) +; CHECK-NEXT: movq %rdx, 16(%rax) ; CHECK-NEXT: movq %r9, 24(%rax) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq entry: %0 = extractvalue %S %arg.b, 0