Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2161,6 +2161,44 @@ return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0.getOperand(0), N0.getOperand(1), CarryIn); + /** + * Sometime, after legalization, we get a 'diamond' carry chain. Because + * the carry cannot propagate through both path at once, it is possible to + * restructure the chain so that it becomes linear. After which, other + * combine can kick in to optimize it. + * + * The pattern: + * (uaddo A, B) + * / \ + * Carry Sum + * / \ + * (addcarry 0, 0, *) (addcarry *, 0, Z) + * \ / + * Sum Carry + * \ / + * (addcarry X, *, *) + * + * is tranformed into + * (addcarry X, 0, (addcarry A, B, Z):Carry) + */ + if (N1.getOpcode() == ISD::ADDCARRY && + isNullConstant(N1.getOperand(0)) && + isNullConstant(N1.getOperand(1)) && + CarryIn.getOpcode() == ISD::ADDCARRY && + isNullConstant(CarryIn.getOperand(1))) { + auto Y = CarryIn.getOperand(0); + if (Y.getOpcode() == ISD::UADDO && + N1.getOperand(2).getNode() == Y.getNode()) { + auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(), + Y.getOperand(0), Y.getOperand(1), + CarryIn.getOperand(2)); + AddToWorklist(NewY.getNode()); + return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, + DAG.getConstant(0, SDLoc(N), N0.getValueType()), + NewY.getValue(1)); + } + } + return SDValue(); } Index: test/CodeGen/X86/addcarry.ll =================================================================== --- test/CodeGen/X86/addcarry.ll +++ test/CodeGen/X86/addcarry.ll @@ -86,21 +86,14 @@ define %scalar @pr31719(%scalar* nocapture readonly %this, %scalar %arg.b) { ; CHECK-LABEL: pr31719: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: addq 8(%rsi), %rcx -; CHECK-NEXT: sbbq %r10, %r10 -; CHECK-NEXT: andl $1, %r10d -; CHECK-NEXT: addq 16(%rsi), %r8 -; CHECK-NEXT: sbbq %rax, %rax -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: addq 24(%rsi), %r9 ; CHECK-NEXT: addq (%rsi), %rdx -; CHECK-NEXT: adcq $0, %rcx -; CHECK-NEXT: adcq %r8, %r10 -; CHECK-NEXT: adcq %r9, %rax +; CHECK-NEXT: adcq 8(%rsi), %rcx +; CHECK-NEXT: adcq 16(%rsi), %r8 +; CHECK-NEXT: adcq 24(%rsi), %r9 ; CHECK-NEXT: movq %rdx, (%rdi) ; CHECK-NEXT: movq %rcx, 8(%rdi) -; CHECK-NEXT: movq %r10, 16(%rdi) -; CHECK-NEXT: movq %rax, 24(%rdi) +; CHECK-NEXT: movq %r8, 16(%rdi) +; CHECK-NEXT: movq %r9, 24(%rdi) ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: retq entry: