Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1826,6 +1826,13 @@ } } + // (add X, (adde Y, 0, Carry)) -> (adde X, Y, Carry) + if (N1.getOpcode() == ISD::ADDE && + N->isOnlyUserOf(N1.getValue(0).getNode()) && + isNullConstant(N1.getOperand(1))) + return DAG.getNode(ISD::ADDE, SDLoc(N), DAG.getVTList(VT, MVT::Glue), + N0, N1->getOperand(0), N1->getOperand(2)); + return SDValue(); } @@ -1867,6 +1874,18 @@ SDLoc(N), MVT::Glue)); } + // (addc X, (adde Y, 0, Carry)) -> (adde X, Y, Carry) + // if Y + 1 cannot overflow. + if (N1.getOpcode() == ISD::ADDE && + N->isOnlyUserOf(N1.getValue(0).getNode()) && + isNullConstant(N1.getOperand(1))) { + APInt YZero, YOne; + DAG.computeKnownBits(N1.getOperand(0), YZero, YOne); + if (YZero.getBoolValue()) + return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), N0, + N1->getOperand(0), N1->getOperand(2)); + } + return SDValue(); } @@ -1874,17 +1893,51 @@ SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CarryIn = N->getOperand(2); + auto VTL = N->getVTList(); + auto DL = SDLoc(N); // canonicalize constant to RHS ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); if (N0C && !N1C) - return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), + return DAG.getNode(ISD::ADDE, DL, VTL, N1, N0, CarryIn); // fold (adde x, y, false) -> (addc x, y) if (CarryIn.getOpcode() == ISD::CARRY_FALSE) - return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); + return DAG.getNode(ISD::ADDC, DL, VTL, N0, N1); + + // If the flag result is dead, we can do transform that invalidate it. + if (!N->hasAnyUseOfValue(1)) { + // (adde (add|addc X, Y), 0, Carry) -> (adde X, Y, Carry) + if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::ADDC) && + isNullConstant(N1)) + return DAG.getNode(ISD::ADDE, DL, VTL, + N0->getOperand(0), N0->getOperand(1), CarryIn); + } + + // This is a convoluted pattern, but it is actually common + // when legalizing big int additions. + // (adde X, (adde 0, 0, Y:Carry), (adde Y, 0, Z):Carry) + // -> (adde X, 0, (adde A, B, Z):Carry) + // With Y = (addc A, B) + if (N1.getOpcode() == ISD::ADDE && + isNullConstant(N1.getOperand(0)) && + isNullConstant(N1.getOperand(1)) && + CarryIn.getOpcode() == ISD::ADDE && + isNullConstant(CarryIn.getOperand(1))) { + auto Y = CarryIn.getOperand(0); + if (Y.getOpcode() == ISD::ADDC && + N1.getOperand(2).getNode() == Y.getNode()) { + auto NewY = DAG.getNode(ISD::ADDE, DL, VTL, + Y.getOperand(0), Y.getOperand(1), + CarryIn.getOperand(2)); + AddToWorklist(NewY.getNode()); + return DAG.getNode(ISD::ADDE, DL, VTL, N0, + DAG.getConstant(0, DL, N0.getValueType()), + NewY.getValue(1)); + } + } return SDValue(); } Index: test/CodeGen/X86/adde-carry.ll =================================================================== --- test/CodeGen/X86/adde-carry.ll +++ test/CodeGen/X86/adde-carry.ll @@ -28,9 +28,7 @@ ; CHECK-LABEL: pr31719: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: addq %rdx, %rdi -; CHECK-NEXT: sbbq %rax, %rax -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: addq %rsi, %rax +; CHECK-NEXT: adcq $0, %rsi ; CHECK-NEXT: sbbq %rax, %rax ; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: retq @@ -47,3 +45,28 @@ %t11 = trunc i128 %t10 to i64 ret i64 %t11 } + +define void @add256(i256* %a, i256* %b, i256* %out) #0 { +; CHECK-LABEL: add256: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movq 16(%rdi), %rax +; CHECK-NEXT: movq (%rdi), %r8 +; CHECK-NEXT: movq 8(%rdi), %rcx +; CHECK-NEXT: movq 24(%rsi), %r9 +; CHECK-NEXT: addq (%rsi), %r8 +; CHECK-NEXT: adcq 8(%rsi), %rcx +; CHECK-NEXT: adcq 16(%rsi), %rax +; CHECK-NEXT: adcq 24(%rdi), %r9 +; CHECK-NEXT: movq %rax, 16(%rdx) +; CHECK-NEXT: movq %rcx, 8(%rdx) +; CHECK-NEXT: movq %r8, (%rdx) +; CHECK-NEXT: movq %r9, 24(%rdx) +; CHECK-NEXT: retq +entry: + %av = load i256, i256* %a + %bv = load i256, i256* %b + %r = add i256 %av, %bv + store i256 %r, i256* %out + ret void +} +