Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -625,7 +625,8 @@ SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -802,6 +802,9 @@ setOperationAction(ISD::SSUBO, MVT::i32, Custom); setOperationAction(ISD::USUBO, MVT::i32, Custom); + setOperationAction(ISD::ADDCARRY, MVT::i32, Custom); + setOperationAction(ISD::SUBCARRY, MVT::i32, Custom); + // i64 operation support. setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); @@ -3953,7 +3956,7 @@ } SDValue -ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { +ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const { // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) return SDValue(); @@ -3975,6 +3978,66 @@ return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); } +static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, + SelectionDAG &DAG) { + SDLoc DL(BoolCarry); + EVT CarryVT = BoolCarry.getValueType(); + + APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits()); + // This converts the boolean value carry into the carry flag by doing + // ARMISD::ADDC Carry, ~0 + return DAG.getNode(ARMISD::ADDC, DL, DAG.getVTList(CarryVT, MVT::i32), + BoolCarry, DAG.getConstant(NegOne, DL, CarryVT)); +} + +static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, + SelectionDAG &DAG) { + SDLoc DL(Flags); + + // Now convert the carry flag into a boolean carry. We do this + // using ARMISD:ADDE 0, 0, Carry + return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), Flags); +} + +SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op, + SelectionDAG &DAG) const { + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) + return SDValue(); + + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDLoc dl(Op); + + EVT VT = Op.getValueType(); + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + SDValue Value; + SDValue Overflow; + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown overflow instruction!"); + case ISD::UADDO: + Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS); + // Convert the carry flag into a boolean value. + Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG); + break; + case ISD::USUBO: { + Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS); + // Convert the carry flag into a boolean value. + Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG); + // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow + // value. So compute 1 - C. + Overflow = DAG.getNode(ISD::SUB, dl, VTs, + DAG.getConstant(1, dl, MVT::i32), Overflow); + break; + } + } + + return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); +} + SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); @@ -7380,6 +7443,53 @@ Op.getOperand(1), Op.getOperand(2)); } +static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) { + SDNode *N = Op.getNode(); + EVT VT = N->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + + SDValue Carry = Op.getOperand(2); + EVT CarryVT = Carry.getValueType(); + + SDLoc DL(Op); + + APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits()); + + SDValue Result; + if (Op.getOpcode() == ISD::ADDCARRY) { + // This converts the boolean value carry into the carry flag. + Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG); + + // Do the addition proper using the carry flag we wanted. + Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0), + Op.getOperand(1), Carry.getValue(1)); + + // Now convert the carry flag into a boolean value. + Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG); + } else { + // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we + // have to invert the carry first. + Carry = + DAG.getNode(ISD::SUB, DL, VTs, DAG.getConstant(1, DL, MVT::i32), Carry); + // This converts the boolean value carry into the carry flag. + Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG); + + // Do the subtraction proper using the carry flag we wanted. + Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0), + Op.getOperand(1), Carry.getValue(1)); + + // Now convert the carry flag into a boolean value. + Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG); + // But the carry returned by ARMISD::SUBE is not a borrow as expected + // by ISD::SUBCARRY, so compute 1 - C. + Carry = + DAG.getNode(ISD::SUB, DL, VTs, DAG.getConstant(1, DL, MVT::i32), Carry); + } + + // Return both values. + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry); +} + SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetDarwin()); @@ -7734,11 +7844,14 @@ case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::ADDCARRY: + case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG); case ISD::SADDO: - case ISD::UADDO: case ISD::SSUBO: + return LowerSignedALUO(Op, DAG); + case ISD::UADDO: case ISD::USUBO: - return LowerXALUO(Op, DAG); + return LowerUnsignedALUO(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); @@ -9687,11 +9800,11 @@ // a S/UMLAL instruction. // UMUL_LOHI // / :lo \ :hi - // / \ [no multiline comment] - // loAdd -> ADDE | - // \ :glue / - // \ / - // ADDC <- hiAdd + // V \ [no multiline comment] + // loAdd -> ADDC | + // \ :carry / + // V V + // ADDE <- hiAdd // assert(AddeNode->getOpcode() == ARMISD::ADDE && "Expect an ADDE"); @@ -9699,7 +9812,7 @@ AddeNode->getOperand(2).getValueType() == MVT::i32 && "ADDE node has the wrong inputs"); - // Check that we have a glued ADDC node. + // Check that we are chained to the right ADDC node. SDNode* AddcNode = AddeNode->getOperand(2).getNode(); if (AddcNode->getOpcode() != ARMISD::ADDC) return SDValue(); @@ -9750,7 +9863,7 @@ SDValue* LoMul = nullptr; SDValue* LowAdd = nullptr; - // Ensure that ADDE is from high result of ISD::SMUL_LOHI. + // Ensure that ADDE is from high result of ISD::xMUL_LOHI. if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1))) return SDValue(); @@ -9775,6 +9888,11 @@ if (!LoMul) return SDValue(); + // If HiAdd is a predecessor of ADDC, the replacement below will create a + // cycle. + if (AddcNode->isPredecessorOf(HiAdd->getNode())) + return SDValue(); + // Create the merged node. SelectionDAG &DAG = DCI.DAG; @@ -9877,8 +9995,22 @@ return SDValue(); } -static SDValue PerformAddcSubcCombine(SDNode *N, SelectionDAG &DAG, +static SDValue PerformAddcSubcCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { + SelectionDAG &DAG(DCI.DAG); + + if (N->getOpcode() == ARMISD::ADDC) { + // (ADDC (ADDE 0, 0, C), -1) -> C + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (LHS->getOpcode() == ARMISD::ADDE && + isNullConstant(LHS->getOperand(0)) && + isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) { + return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2)); + } + } + if (Subtarget->isThumb1Only()) { SDValue RHS = N->getOperand(1); if (ConstantSDNode *C = dyn_cast(RHS)) { @@ -11767,6 +11899,14 @@ return SDValue(); } +static const APInt *isPowerOf2Constant(SDValue V) { + ConstantSDNode *C = dyn_cast(V); + if (!C) + return nullptr; + const APInt *CV = &C->getAPIntValue(); + return CV->isPowerOf2() ? CV : nullptr; +} + SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const { // If we have a CMOV, OR and AND combination such as: // if (x & CN) @@ -11795,8 +11935,8 @@ SDValue And = CmpZ->getOperand(0); if (And->getOpcode() != ISD::AND) return SDValue(); - ConstantSDNode *AndC = dyn_cast(And->getOperand(1)); - if (!AndC || !AndC->getAPIntValue().isPowerOf2()) + const APInt *AndC = isPowerOf2Constant(And->getOperand(1)); + if (!AndC) return SDValue(); SDValue X = And->getOperand(0); @@ -11836,7 +11976,7 @@ SDValue V = Y; SDLoc dl(X); EVT VT = X.getValueType(); - unsigned BitInX = AndC->getAPIntValue().logBase2(); + unsigned BitInX = AndC->logBase2(); if (BitInX != 0) { // We must shift X first. @@ -11997,7 +12137,7 @@ case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); case ARMISD::ADDC: - case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI.DAG, Subtarget); + case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget); case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI.DAG, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); @@ -12693,10 +12833,17 @@ case ARMISD::ADDE: case ARMISD::SUBC: case ARMISD::SUBE: - // These nodes' second result is a boolean - if (Op.getResNo() == 0) - break; - Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + // Special cases when we convert a carry to a boolean. + if (Op.getResNo() == 0) { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + // (ADDE 0, 0, C) will give us a single bit. + if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) && + isNullConstant(RHS)) { + Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + return; + } + } break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. Index: test/CodeGen/ARM/intrinsics-overflow.ll =================================================================== --- test/CodeGen/ARM/intrinsics-overflow.ll +++ test/CodeGen/ARM/intrinsics-overflow.ll @@ -1,4 +1,6 @@ -; RUN: llc < %s -mtriple=arm-linux -mcpu=generic | FileCheck %s +; RUN: llc < %s -mtriple=arm-linux -mcpu=generic -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=ARM +; RUN: llc < %s -mtriple=thumbv6m-eabi -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=THUMBV6 +; RUN: llc < %s -mtriple=thumbv7-eabi -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=THUMBV7 define i32 @uadd_overflow(i32 %a, i32 %b) #0 { %sadd = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) @@ -7,10 +9,19 @@ ret i32 %2 ; CHECK-LABEL: uadd_overflow: - ; CHECK: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]] - ; CHECK: mov r[[R1]], #1 - ; CHECK: cmp r[[R2]], r[[R0]] - ; CHECK: movhs r[[R1]], #0 + + ; ARM: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] + ; ARM: mov r[[R2:[0-9]+]], #0 + ; ARM: adc r[[R0]], r[[R2]], #0 + + ; THUMBV6: movs r[[R2:[0-9]+]], #0 + ; THUMBV6: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] + ; THUMBV6: adcs r[[R2]], r[[R2]] + ; THUMBV6: mov r[[R0]], r[[R2]] + + ; THUMBV7: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] + ; THUMBV7: mov.w r[[R2:[0-9]+]], #0 + ; THUMBV7: adc r[[R0]], r[[R2]], #0 } @@ -21,10 +32,26 @@ ret i32 %2 ; CHECK-LABEL: sadd_overflow: - ; CHECK: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]] - ; CHECK: mov r[[R1]], #1 - ; CHECK: cmp r[[R2]], r[[R0]] - ; CHECK: movvc r[[R1]], #0 + + ; ARM: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]] + ; ARM: mov r[[R1]], #1 + ; ARM: cmp r[[R2]], r[[R0]] + ; ARM: movvc r[[R1]], #0 + + ; THUMBV6: mov r[[R2:[0-9]+]], r[[R0:[0-9]+]] + ; THUMBV6: adds r[[R3:[0-9]+]], r[[R2]], r[[R1:[0-9]+]] + ; THUMBV6: movs r[[R0]], #0 + ; THUMBV6: movs r[[R1]], #1 + ; THUMBV6: cmp r[[R3]], r[[R2]] + ; THUMBV6: bvc .L[[LABEL:.*]] + ; THUMBV6: mov r[[R0]], r[[R1]] + ; THUMBV6: .L[[LABEL]]: + + ; THUMBV7: movs r[[R1]], #1 + ; THUMBV7: cmp r[[R2]], r[[R0]] + ; THUMBV7: it vc + ; THUMBV7: movvc r[[R1]], #0 + ; THUMBV7: mov r[[R0]], r[[R1]] } define i32 @usub_overflow(i32 %a, i32 %b) #0 { @@ -34,9 +61,26 @@ ret i32 %2 ; CHECK-LABEL: usub_overflow: - ; CHECK: mov r[[R2]], #1 - ; CHECK: cmp r[[R0]], r[[R1]] - ; CHECK: movhs r[[R2]], #0 + + ; ARM: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] + ; ARM: mov r[[R2:[0-9]+]], #0 + ; ARM: adc r[[R0]], r[[R2]], #0 + ; ARM: rsb r[[R0]], r[[R0]], #1 + + ; THUMBV6: movs r[[R2:[0-9]+]], #0 + ; THUMBV6: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] + ; THUMBV6: adcs r[[R2]], r[[R2]] + ; THUMBV6: movs r[[R0]], #1 + ; THUMBV6: subs r[[R0]], r[[R0]], r[[R2]] + + ; THUMBV7: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] + ; THUMBV7: mov.w r[[R2:[0-9]+]], #0 + ; THUMBV7: adc r[[R0]], r[[R2]], #0 + ; THUMBV7: rsb.w r[[R0]], r[[R0]], #1 + + ; We should know that the overflow is just 1 bit, + ; no need to clear any other bit + ; CHECK-NOT: and } define i32 @ssub_overflow(i32 %a, i32 %b) #0 { @@ -46,9 +90,23 @@ ret i32 %2 ; CHECK-LABEL: ssub_overflow: - ; CHECK: mov r[[R2]], #1 - ; CHECK: cmp r[[R0]], r[[R1]] - ; CHECK: movvc r[[R2]], #0 + + ; ARM: mov r[[R2]], #1 + ; ARM: cmp r[[R0]], r[[R1]] + ; ARM: movvc r[[R2]], #0 + + ; THUMBV6: movs r[[R0]], #0 + ; THUMBV6: movs r[[R3:[0-9]+]], #1 + ; THUMBV6: cmp r[[R2]], r[[R1:[0-9]+]] + ; THUMBV6: bvc .L[[LABEL:.*]] + ; THUMBV6: mov r[[R0]], r[[R3]] + ; THUMBV6: .L[[LABEL]]: + + ; THUMBV7: movs r[[R2:[0-9]+]], #1 + ; THUMBV7: cmp r[[R0:[0-9]+]], r[[R1:[0-9]+]] + ; THUMBV7: it vc + ; THUMBV7: movvc r[[R2]], #0 + ; THUMBV7: mov r[[R0]], r[[R2]] } declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 Index: test/CodeGen/ARM/pr34045.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/pr34045.ll @@ -0,0 +1,53 @@ +; RUN: llc < %s -mtriple thumbv7 | FileCheck %s + +; ModuleID = 'bugpoint-reduced-simplified.bc' +define hidden void @bn_mul_comba8(i32* nocapture %r, i32* nocapture readonly %a, i32* nocapture readonly %b) local_unnamed_addr { +entry: +; This test is actually checking that no cycle is introduced but at least we +; want to see a couple of umull and one umlal in the output +; CHECK: umull +; CHECK: umull +; CHECK: umlal + %0 = load i32, i32* %a, align 4 + %conv = zext i32 %0 to i64 + %1 = load i32, i32* %b, align 4 + %conv2 = zext i32 %1 to i64 + %mul = mul nuw i64 %conv2, %conv + %shr = lshr i64 %mul, 32 + %2 = load i32, i32* %a, align 4 + %conv13 = zext i32 %2 to i64 + %3 = load i32, i32* undef, align 4 + %conv15 = zext i32 %3 to i64 + %mul16 = mul nuw i64 %conv15, %conv13 + %add18 = add i64 %mul16, %shr + %shr20 = lshr i64 %add18, 32 + %conv21 = trunc i64 %shr20 to i32 + %4 = load i32, i32* undef, align 4 + %conv34 = zext i32 %4 to i64 + %5 = load i32, i32* %b, align 4 + %conv36 = zext i32 %5 to i64 + %mul37 = mul nuw i64 %conv36, %conv34 + %conv38 = and i64 %add18, 4294967295 + %add39 = add i64 %mul37, %conv38 + %shr41 = lshr i64 %add39, 32 + %conv42 = trunc i64 %shr41 to i32 + %add43 = add i32 %conv42, %conv21 + %cmp44 = icmp ult i32 %add43, %conv42 + %c1.1 = zext i1 %cmp44 to i32 + %add65 = add i32 0, %c1.1 + %add86 = add i32 %add65, 0 + %add107 = add i32 %add86, 0 + %conv124 = zext i32 %add107 to i64 + %add125 = add i64 0, %conv124 + %conv145 = and i64 %add125, 4294967295 + %add146 = add i64 %conv145, 0 + %conv166 = and i64 %add146, 4294967295 + %add167 = add i64 %conv166, 0 + %conv187 = and i64 %add167, 4294967295 + %add188 = add i64 %conv187, 0 + %conv189 = trunc i64 %add188 to i32 + %arrayidx200 = getelementptr inbounds i32, i32* %r, i32 3 + store i32 %conv189, i32* %arrayidx200, align 4 + ret void +} +