diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -52,6 +52,10 @@ ADC, SBC, // adc, sbc instructions + // Arithmetic instructions + SDIV_PRED, + UDIV_PRED, + // Arithmetic instructions which write flags. ADDS, SUBS, @@ -781,6 +785,8 @@ SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDIV(SDValue Op, SelectionDAG &DAG, + unsigned NewOp) const; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -883,8 +883,11 @@ // splat of 0 or undef) once vector selects supported in SVE codegen. See // D68877 for more details. for (MVT VT : MVT::integer_scalable_vector_valuetypes()) { - if (isTypeLegal(VT)) + if (isTypeLegal(VT)) { setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + setOperationAction(ISD::SDIV, VT, Custom); + setOperationAction(ISD::UDIV, VT, Custom); + } } setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); @@ -1280,6 +1283,8 @@ case AArch64ISD::CSINC: return "AArch64ISD::CSINC"; case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER"; case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ"; + case AArch64ISD::SDIV_PRED: return "AArch64ISD::SDIV_PRED"; + case AArch64ISD::UDIV_PRED: return "AArch64ISD::UDIV_PRED"; case AArch64ISD::ADC: return "AArch64ISD::ADC"; case AArch64ISD::SBC: return "AArch64ISD::SBC"; case AArch64ISD::ADDS: return "AArch64ISD::ADDS"; @@ -3342,6 +3347,10 @@ return LowerSPLAT_VECTOR(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); + case ISD::SDIV: + return LowerDIV(Op, DAG, AArch64ISD::SDIV_PRED); + case ISD::UDIV: + return LowerDIV(Op, DAG, AArch64ISD::UDIV_PRED); case ISD::SRA: case ISD::SRL: case ISD::SHL: @@ -7648,6 +7657,23 @@ return DAG.getNode(ISD::BITCAST, DL, VT, TBL); } +SDValue AArch64TargetLowering::LowerDIV(SDValue Op, + SelectionDAG &DAG, + unsigned NewOp) const { + EVT VT = Op.getValueType(); + SDLoc DL(Op); + + assert(Op.getOperand(0).getValueType().isScalableVector() && + Op.getOperand(1).getValueType().isScalableVector() && + "Only scalable vectors are supported"); + + auto PredTy = VT.getVectorVT(*DAG.getContext(), MVT::i1, + VT.getVectorNumElements(), true); + SDValue Mask = getPTrue(DAG, DL, PredTy, AArch64SVEPredPattern::all); + + return DAG.getNode(NewOp, DL, VT, Mask, Op.getOperand(0), Op.getOperand(1)); +} + static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, APInt &UndefBits) { EVT VT = BVN->getValueType(0); @@ -11359,6 +11385,12 @@ N->getOperand(1)); case Intrinsic::aarch64_sve_ext: return LowerSVEIntrinsicEXT(N, DAG); + case Intrinsic::aarch64_sve_sdiv: + return DAG.getNode(AArch64ISD::SDIV_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); + case Intrinsic::aarch64_sve_udiv: + return DAG.getNode(AArch64ISD::UDIV_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_sel: return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -145,6 +145,14 @@ def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>; def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>; +def SDT_AArch64DIV : SDTypeProfile<1, 3, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, + SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3> +]>; + +def AArch64sdiv_pred : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64DIV>; +def AArch64udiv_pred : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64DIV>; + def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>; def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>; def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>; @@ -239,8 +247,8 @@ def : Pat<(mul nxv2i64:$Op1, nxv2i64:$Op2), (MUL_ZPmZ_D (PTRUE_D 31), $Op1, $Op2)>; - defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", int_aarch64_sve_sdiv>; - defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", int_aarch64_sve_udiv>; + defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", AArch64sdiv_pred>; + defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", AArch64udiv_pred>; defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", int_aarch64_sve_sdivr>; defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", int_aarch64_sve_udivr>; diff --git a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll @@ -0,0 +1,45 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; SDIV +; + +define @sdiv_i32( %a, %b) { +; CHECK-LABEL: @sdiv_i32 +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: sdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %div = sdiv %a, %b + ret %div +} + +define @sdiv_i64( %a, %b) { +; CHECK-LABEL: @sdiv_i64 +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %div = sdiv %a, %b + ret %div +} + +; +; UDIV +; + +define @udiv_i32( %a, %b) { +; CHECK-LABEL: @udiv_i32 +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: udiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %div = udiv %a, %b + ret %div +} + +define @udiv_i64( %a, %b) { +; CHECK-LABEL: @udiv_i64 +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: udiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %div = udiv %a, %b + ret %div +}