Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -73,6 +73,8 @@ CMOV, // ARM conditional move instructions. SSAT, // Signed saturation + QADD, // Signed saturing add + QSUB, // Signed saturating sub BCC_i64, Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -1146,6 +1146,7 @@ setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::XOR); + setTargetDAGCombine(ISD::SELECT); if (Subtarget->hasV6Ops()) setTargetDAGCombine(ISD::SRL); @@ -1264,6 +1265,8 @@ case ARMISD::ADDE: return "ARMISD::ADDE"; case ARMISD::SUBC: return "ARMISD::SUBC"; case ARMISD::SUBE: return "ARMISD::SUBE"; + case ARMISD::QADD: return "ARMISD::QADD"; + case ARMISD::QSUB: return "ARMISD::QSUB"; case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; @@ -1470,6 +1473,14 @@ return false; } +static bool isSRA31(const SDValue &Op) { + if (Op.getOpcode() != ISD::SRA) + return false; + if (auto Const = dyn_cast(Op.getOperand(1))) + return Const->getZExtValue() == 31; + return false; +} + static bool isSHL16(const SDValue &Op) { if (Op.getOpcode() != ISD::SHL) return false; @@ -3975,6 +3986,185 @@ return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); } +static bool is64bitArith(SDValue ArithE, SDValue ArithC) { + if ((ArithE.getOpcode() == ISD::ADDE && ArithC.getOpcode() == ISD::ADDC) || + (ArithE.getOpcode() == ISD::SUBE && ArithC.getOpcode() == ISD::SUBC)) { + + if (ArithC->getGluedUser() != ArithE.getNode()) + return false; + + SDValue SRA0 = ArithE.getOperand(0); + SDValue SRA1 = ArithE.getOperand(1); + + if (!isSRA31(SRA0) || !isSRA31(SRA1)) + return false; + + if (ArithC.getOperand(0) != SRA0.getOperand(0) && + ArithC.getOperand(1) != SRA0.getOperand(0)) + return false; + + if (ArithC.getOperand(0) != SRA1.getOperand(0) && + ArithC.getOperand(1) != SRA1.getOperand(0)) + return false; + + return true; + } + return false; +} + +static bool isBoolMask(SDValue Op) { + if (Op.getOpcode() != ISD::AND) + return false; + + if (auto *Mask = dyn_cast(Op.getOperand(1))) + return Mask->getZExtValue() == 1; + + return false; +} + +static bool isSelectInt32Limit(SDValue, bool, bool, int, SDValue&); + +static bool SearchForCarrySet(SDValue Subc, bool isAdd, bool isMax, int Depth, + SDValue CarryUse, SDValue &CarrySet) { + DEBUG(dbgs() << "-- SearchForCarrySet:\n"; + Subc.dump()); + int MinLimit = std::numeric_limits::min(); + int MaxLimit = std::numeric_limits::max(); + int Limit = isMax ? MaxLimit : MinLimit; + unsigned ImmIdx = isMax ? 1 : 0; + unsigned CarryOrSelectIdx = isMax ? 0 : 1; + unsigned CarrySetOpc = isAdd ? ISD::ADDC : ISD::SUBC; + + // Minimum, search for: subc(INT32_MIN, carry_set|max_select()) + // Maximum, search for: subc(carry_set|min_select(), INT32_MAX) + + if (Subc.getOpcode() != ISD::SUBC) + return false; + + if (auto Const = dyn_cast(Subc.getOperand(ImmIdx))) { + if (Const->getSExtValue() != Limit) + return false; + } else + return false; + + SDValue CarryOrSelect = Subc.getOperand(CarryOrSelectIdx); + unsigned Opcode = CarryOrSelect.getOpcode(); + + // If we find a select here, it needs to be the node that is performing the + // opposite check to the original. + if ((Opcode == ISD::SELECT && + isSelectInt32Limit(CarryOrSelect, isAdd, !isMax, Depth, CarrySet))) { + return true; + } else if (Opcode == CarrySetOpc) { + CarrySet = CarryOrSelect; + DEBUG(dbgs() << "-- Found CarrySet:\n"; + CarrySet.dump()); + return is64bitArith(CarryUse, CarrySet); + } + return false; +} + +static bool SearchForCarryUse(SDValue SetCCE, bool isAdd, bool isMax, int Depth, + SDValue &CarrySet) { + DEBUG(dbgs() << "-- At Depth " << Depth << ", SearchForCarryUse:\n"; + SetCCE.dump()); + // The second call to SearchForCarryUse should discover the adde node. + if (Depth > 1) + return false; + + unsigned ImmIdx = isMax ? 1 : 0; + int FirstImmVal = isMax ? 0 : -1; + int SecondImmVal = isMax ? -1 : 0; + unsigned CarryOrSelectIdx = isMax ? 0 : 1; + unsigned CarryUseOpc = isAdd ? ISD::ADDE : ISD::SUBE; + // Minium: + // setcce(-1, select|adde(), subc(INT32_MIN, max_select()), setlt) + // Maximum: + // setcce(select|adde(), 0, subc(min_select(), INT32_MAX), setlt) + + if (SetCCE.getOpcode() != ISD::SETCCE) + return false; + if (cast(SetCCE.getOperand(3))->get() != ISD::SETLT) + return false; + + if (auto Const = dyn_cast(SetCCE.getOperand(ImmIdx))) { + if (Const->getSExtValue() != FirstImmVal) + return false; + } else + return false; + + SDValue CarryUse; + unsigned Opcode = SetCCE.getOperand(CarryOrSelectIdx).getOpcode(); + if (Depth == 1) { + if (Opcode == CarryUseOpc) + CarryUse = SetCCE.getOperand(CarryOrSelectIdx); + else + return false; + } else if (Opcode != ISD::SELECT) { + return false; + } else { + // Search for the opposite limit check. + SDValue Select = SetCCE.getOperand(CarryOrSelectIdx); + SDValue And = Select.getOperand(0); + if (!isBoolMask(And)) + return false; + if (!SearchForCarryUse(And.getOperand(0), isAdd, !isMax, ++Depth, CarrySet)) + return false; + + if (Select.getOperand(1).getOpcode() == CarryUseOpc) + CarryUse = Select.getOperand(1); + else + return false; + + if (auto Const = dyn_cast(Select.getOperand(2))) { + if (Const->getSExtValue() != SecondImmVal) + return false; + } else + return false; + } + + // We've found the adde, now find the subc addc and ensure that both are + // using the same adde node. + DEBUG(dbgs() << "-- Found CarryUse:\n"; + CarryUse.dump()); + return SearchForCarrySet(SetCCE.getOperand(2), isAdd, isMax, Depth, CarryUse, + CarrySet); +} + +static bool isSelectInt32Limit(SDValue Op, bool isAdd, bool isMax, int Depth, + SDValue &Addc) { + DEBUG(dbgs() << "-- isSelectInt32Limit?\n"; + Op.dump()); + // Minium: + // select (and setcce(-1, select|carry_use(), subc(INT32_MIN, max_select()), + // setlt), 1), max_select(), INT32_MIN) + // Maximum: + // select (and setcce(select|carry_use(), 0, subc(min_select(), INT32_MAX), + // setlt), 1), min_select(), INT32_MAX) + int32_t Limit = + isMax ? std::numeric_limits::max() : std::numeric_limits::min(); + + if (isMax) + DEBUG(dbgs() << "-- Looking for Max select\n"); + else + DEBUG(dbgs() << "-- Looking for Min select\n"); + + if (Op.getOpcode() != ISD::SELECT) + return false; + if (auto Const = dyn_cast(Op.getOperand(2))) { + if (Const->getSExtValue() != Limit) + return false; + } else { + return false; + } + + SDValue And = Op.getOperand(0); + if (!isBoolMask(And)) + return false; + + return SearchForCarryUse(And.getOperand(0), isAdd, isMax, Depth, Addc); +} + SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); @@ -4255,6 +4445,7 @@ uint64_t SatConstant; if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) && isSaturatingConditional(Op, SatValue, SatConstant)) + return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue, DAG.getConstant(countTrailingOnes(SatConstant), dl, VT)); @@ -7674,6 +7865,8 @@ } SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + DEBUG(dbgs() << "LowerOperation:\n"; + Op.dump()); switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); @@ -11984,8 +12177,38 @@ return Res; } +static SDValue PerformSELECTCombine(SDNode *N, + ARMTargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *ST) { + if (ST->isThumb()) { + if (!ST->hasDSP()) + return SDValue(); + } else if (!ST->hasV5TEOps()) + return SDValue(); + + SDValue Carry; + SDValue Op = SDValue(N, 0); + if (isSelectInt32Limit(Op, true, true, 0, Carry) || + isSelectInt32Limit(Op, true, false, 0, Carry) || + isSelectInt32Limit(Op, false, true, 0, Carry) || + isSelectInt32Limit(Op, false, false, 0, Carry)) + { + + SDLoc dl(N); + unsigned Opcode = Carry.getOpcode() == ISD::ADDC ? + ARMISD::QADD : ARMISD::QSUB; + SDValue Q = DCI.DAG.getNode(Opcode, dl, MVT::i32, + Carry.getOperand(0), Carry.getOperand(1)); + DCI.DAG.ReplaceAllUsesOfValueWith(Op, Q); + return Op; + } + return SDValue(); +} + SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { + DEBUG(dbgs() << "\nPerformDAGCombine on:\n"; + N->dump()); switch (N->getOpcode()) { default: break; case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget); @@ -11996,6 +12219,7 @@ case ISD::OR: return PerformORCombine(N, DCI, Subtarget); case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); + case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget); case ARMISD::ADDC: case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI.DAG, Subtarget); case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI.DAG, Subtarget); Index: lib/Target/ARM/ARMInstrInfo.td =================================================================== --- lib/Target/ARM/ARMInstrInfo.td +++ lib/Target/ARM/ARMInstrInfo.td @@ -139,6 +139,9 @@ def ARMssatnoshift : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>; +def ARMqadd : SDNode<"ARMISD::QADD", SDTIntBinOp, []>; +def ARMqsub : SDNode<"ARMISD::QSUB", SDTIntBinOp, []>; + def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; @@ -3691,6 +3694,11 @@ [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))]>; } +def : Pat<(ARMqadd GPRnopc:$Rm, GPRnopc:$Rn), + (QADD GPRnopc:$Rm, GPRnopc:$Rn)>; +def : Pat<(ARMqsub GPRnopc:$Rm, GPRnopc:$Rn), + (QSUB GPRnopc:$Rm, GPRnopc:$Rn)>; + def UQADD16 : AAIIntrinsic<0b01100110, 0b11110001, "uqadd16", int_arm_uqadd16>; def UQADD8 : AAIIntrinsic<0b01100110, 0b11111001, "uqadd8", int_arm_uqadd8>; def UQSUB16 : AAIIntrinsic<0b01100110, 0b11110111, "uqsub16", int_arm_uqsub16>; Index: test/CodeGen/ARM/saturating-arith.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/saturating-arith.ll @@ -0,0 +1,143 @@ +; RUN: llc -mtriple=armv4t %s -o - | FileCheck %s -check-prefix=NO-DSP +; RUN: llc -mtriple=armv5 %s -o - | FileCheck %s -check-prefix=NO-DSP +; RUN: llc -mtriple=armv5te %s -o - | FileCheck %s +; RUN: llc -mtriple=armv6 %s -o - | FileCheck %s +; RUN: llc -mtriple=armv7 %s -o - | FileCheck %s +; RUN: llc -mtriple=armv8 %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv6t2 %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv6m %s -o - | FileCheck %s -check-prefix=NO-DSP +; RUN: llc -mtriple=thumbv7 %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv7m %s -o - | FileCheck %s -check-prefix=NO-DSP +; RUN: llc -mtriple=thumbv7em %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8 %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8m.mainline %s -o - | FileCheck %s -check-prefix=NO-DSP +; RUN: llc -mtriple=thumbv8m.baseline %s -o - | FileCheck %s -check-prefix=NO-DSP +; RUN: llc -march=thumb -mcpu=cortex-m33 %s -o - | FileCheck %s + +; CHECK-LABEL: test_sat_add0: +; CHECK-LABEL: BB#0: +; CHECK-NEXT: qadd +; NO-DSP-NOT: qadd +define arm_aapcscc i32 @test_sat_add0(i32 %a, i32 %b) { +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %add = add nsw i64 %conv1, %conv + %0 = icmp sgt i64 %add, -2147483648 + %cond = select i1 %0, i64 %add, i64 -2147483648 + %1 = icmp slt i64 %cond, 2147483647 + %cond8.off014 = select i1 %1, i64 %cond, i64 2147483647 + %2 = trunc i64 %cond8.off014 to i32 + ret i32 %2 +} + +; CHECK-LABEL: test_sat_add1: +; CHECK-LABEL: BB#0: +; CHECK-NEXT: qadd +; NO-DSP-NOT: qadd +define arm_aapcscc i32 @test_sat_add1(i32 %a, i32 %b) { +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %add = add nsw i64 %conv1, %conv + %0 = icmp slt i64 %add, 2147483647 + %cond = select i1 %0, i64 %add, i64 2147483647 + %1 = icmp sgt i64 %cond, -2147483648 + %cond8.off014 = select i1 %1, i64 %cond, i64 -2147483648 + %2 = trunc i64 %cond8.off014 to i32 + ret i32 %2 +} + +; CHECK-LABEL: test_sat_add_neg0: +; CHECK-LABEL: BB#0: +; CHECK-NOT: qadd +; NO-DSP-NOT: qadd +define arm_aapcscc i32 @test_sat_add_neg0(i32 %a, i32 %b) { +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %add = add nsw i64 %conv1, %conv + %0 = icmp slt i64 %add, 2147483647 + %cond = select i1 %0, i64 %add, i64 2147483647 + %conv3 = trunc i64 %cond to i32 + ret i32 %conv3 +} + +; CHECK-LABEL: test_sat_add_neg1: +; CHECK-LABEL: BB#0: +; CHECK-NOT: qadd +; NO-DSP-NOT: qadd +define arm_aapcscc i32 @test_sat_add_neg1(i32 %a, i32 %b) { +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %add = add nsw i64 %conv1, %conv + %0 = icmp sgt i64 %add, -2147483648 + %cond = select i1 %0, i64 %add, i64 -2147483648 + %conv3 = trunc i64 %cond to i32 + ret i32 %conv3 +} + +; CHECK-LABEL: test_sat_sub0: +; CHECK-LABEL: BB#0: +; CHECK-NEXT: qsub +; NO-DSP-NOT: qsub +define arm_aapcscc i32 @test_sat_sub0(i32 %a, i32 %b) { +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %sub = sub nsw i64 %conv, %conv1 + %0 = icmp sgt i64 %sub, -2147483648 + %cond = select i1 %0, i64 %sub, i64 -2147483648 + %1 = icmp slt i64 %cond, 2147483647 + %cond8.off014 = select i1 %1, i64 %cond, i64 2147483647 + %2 = trunc i64 %cond8.off014 to i32 + ret i32 %2 +} + +; CHECK-LABEL: test_sat_sub1: +; CHECK-LABEL: BB#0: +; CHECK-NEXT: qsub +; NO-DSP-NOT: qsub +define arm_aapcscc i32 @test_sat_sub1(i32 %a, i32 %b) { +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %sub = sub nsw i64 %conv, %conv1 + %0 = icmp slt i64 %sub, 2147483647 + %cond = select i1 %0, i64 %sub, i64 2147483647 + %1 = icmp sgt i64 %cond, -2147483648 + %cond8.off014 = select i1 %1, i64 %cond, i64 -2147483648 + %2 = trunc i64 %cond8.off014 to i32 + ret i32 %2 +} + +; CHECK-LABEL: test_sat_sub_neg0: +; CHECK-LABEL: BB#0: +; CHECK-NOT: qsub +; NO-DSP-NOT: qsub +define arm_aapcscc i32 @test_sat_sub_neg0(i32 %a, i32 %b) { +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %sub = sub nsw i64 %conv1, %conv + %0 = icmp slt i64 %sub, 2147483647 + %cond = select i1 %0, i64 %sub, i64 2147483647 + %conv3 = trunc i64 %cond to i32 + ret i32 %conv3 +} + +; CHECK-LABEL: test_sat_sub_neg1: +; CHECK-LABEL: BB#0: +; CHECK-NOT: qsub +; NO-DSP-NOT: qsub +define arm_aapcscc i32 @test_sat_sub_neg1(i32 %a, i32 %b) { +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %sub = sub nsw i64 %conv1, %conv + %0 = icmp sgt i64 %sub, -2147483648 + %cond = select i1 %0, i64 %sub, i64 -2147483648 + %conv3 = trunc i64 %cond to i32 + ret i32 %conv3 +}