diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2875,6 +2875,93 @@ return SDValue(); } +/** + * If we are facing some sort of diamond carry propapagtion pattern try to + * break it up to generate something like: + * (addcarry X, 0, (addcarry A, B, Z):Carry) + * + * The end result is usually an increase in operation required, but because the + * carry is now linearized, other tranforms can kick in and optimize the DAG. + * + * Patterns typically look something like + * (uaddo A, B) + * / \ + * Carry Sum + * | \ + * | (addcarry *, 0, Z) + * | / + * \ Carry + * | / + * (addcarry X, *, *) + * + * But numerous variation exist. Our goal is to identify A, B, X and Z and + * produce a combine with a single path for carry propagation. + */ +static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, + SDValue X, SDValue Carry0, SDValue Carry1, + SDNode *N) { + if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1) + return SDValue(); + if (Carry1.getOpcode() != ISD::UADDO) + return SDValue(); + + SDValue Z; + + /** + * First look for a suitable Z. It will present itself in the form of + * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true + */ + if (Carry0.getOpcode() == ISD::ADDCARRY && + isNullConstant(Carry0.getOperand(1))) { + Z = Carry0.getOperand(2); + } else if (Carry0.getOpcode() == ISD::UADDO && + isOneConstant(Carry0.getOperand(1))) { + EVT VT = Combiner.getSetCCResultType(Carry0.getValueType()); + Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT); + } else { + // We couldn't find a suitable Z. + return SDValue(); + } + + + auto cancelDiamond = [&](SDValue A,SDValue B) { + SDLoc DL(N); + SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z); + Combiner.AddToWorklist(NewY.getNode()); + return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X, + DAG.getConstant(0, DL, X.getValueType()), + NewY.getValue(1)); + }; + + /** + * (uaddo A, B) + * | + * Sum + * | + * (addcarry *, 0, Z) + */ + if (Carry0.getOperand(0) == Carry1.getValue(0)) { + return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1)); + } + + /** + * (addcarry A, 0, Z) + * | + * Sum + * | + * (uaddo *, B) + */ + if (Carry1.getOperand(0) == Carry0.getValue(0)) { + return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1)); + } + + if (Carry1.getOperand(1) == Carry0.getValue(0)) { + return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0)); + } + + return SDValue(); +} + SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N) { // Iff the flag result is dead: @@ -2889,35 +2976,13 @@ * When one of the addcarry argument is itself a carry, we may be facing * a diamond carry propagation. In which case we try to transform the DAG * to ensure linear carry propagation if that is possible. - * - * We are trying to get: - * (addcarry X, 0, (addcarry A, B, Z):Carry) */ if (auto Y = getAsCarry(TLI, N1)) { - /** - * (uaddo A, B) - * / \ - * Carry Sum - * | \ - * | (addcarry *, 0, Z) - * | / - * \ Carry - * | / - * (addcarry X, *, *) - */ - if (Y.getOpcode() == ISD::UADDO && - CarryIn.getResNo() == 1 && - CarryIn.getOpcode() == ISD::ADDCARRY && - isNullConstant(CarryIn.getOperand(1)) && - CarryIn.getOperand(0) == Y.getValue(0)) { - auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(), - Y.getOperand(0), Y.getOperand(1), - CarryIn.getOperand(2)); - AddToWorklist(NewY.getNode()); - return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, - DAG.getConstant(0, SDLoc(N), N0.getValueType()), - NewY.getValue(1)); - } + // Because both are carries, Y and Z can be swapped. + if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N)) + return R; + if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N)) + return R; } return SDValue(); diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll --- a/llvm/test/CodeGen/X86/addcarry.ll +++ b/llvm/test/CodeGen/X86/addcarry.ll @@ -390,13 +390,13 @@ define i128 @addcarry_to_subcarry(i64 %a, i64 %b) { ; CHECK-LABEL: addcarry_to_subcarry: ; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: notq %rsi -; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movb $1, %cl +; CHECK-NEXT: addb $-1, %cl ; CHECK-NEXT: movq %rdi, %rcx -; CHECK-NEXT: addq %rsi, %rcx -; CHECK-NEXT: setb %al -; CHECK-NEXT: addq $1, %rcx -; CHECK-NEXT: adcq %rdi, %rax +; CHECK-NEXT: adcq %rsi, %rcx +; CHECK-NEXT: adcq $0, %rax ; CHECK-NEXT: setb %cl ; CHECK-NEXT: movzbl %cl, %edx ; CHECK-NEXT: addq %rsi, %rax diff --git a/llvm/test/CodeGen/X86/subcarry.ll b/llvm/test/CodeGen/X86/subcarry.ll --- a/llvm/test/CodeGen/X86/subcarry.ll +++ b/llvm/test/CodeGen/X86/subcarry.ll @@ -90,29 +90,37 @@ define %S @sub(%S* nocapture readonly %this, %S %arg.b) local_unnamed_addr { ; CHECK-LABEL: sub: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq (%rsi), %r10 +; CHECK-NEXT: movq 8(%rsi), %rdi +; CHECK-NEXT: movq %r10, %r11 +; CHECK-NEXT: subq %rdx, %r11 ; CHECK-NEXT: notq %rdx -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: addq (%rsi), %rdx -; CHECK-NEXT: setb %dil -; CHECK-NEXT: addq $1, %rdx -; CHECK-NEXT: adcq 8(%rsi), %rdi -; CHECK-NEXT: setb %r10b -; CHECK-NEXT: movzbl %r10b, %r10d +; CHECK-NEXT: movb $1, %bl +; CHECK-NEXT: addb $-1, %bl +; CHECK-NEXT: adcq %r10, %rdx +; CHECK-NEXT: adcq $0, %rdi +; CHECK-NEXT: setb %dl +; CHECK-NEXT: movzbl %dl, %edx ; CHECK-NEXT: notq %rcx ; CHECK-NEXT: addq %rdi, %rcx -; CHECK-NEXT: adcq 16(%rsi), %r10 -; CHECK-NEXT: setb %dil -; CHECK-NEXT: movzbl %dil, %edi +; CHECK-NEXT: adcq 16(%rsi), %rdx +; CHECK-NEXT: setb %bl +; CHECK-NEXT: movzbl %bl, %edi ; CHECK-NEXT: notq %r8 -; CHECK-NEXT: addq %r10, %r8 +; CHECK-NEXT: addq %rdx, %r8 ; CHECK-NEXT: adcq 24(%rsi), %rdi ; CHECK-NEXT: notq %r9 ; CHECK-NEXT: addq %rdi, %r9 -; CHECK-NEXT: movq %rdx, (%rax) +; CHECK-NEXT: movq %r11, (%rax) ; CHECK-NEXT: movq %rcx, 8(%rax) ; CHECK-NEXT: movq %r8, 16(%rax) ; CHECK-NEXT: movq %r9, 24(%rax) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq entry: %0 = extractvalue %S %arg.b, 0