diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -938,7 +938,6 @@ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp, bool OverrideNEON = false) const; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -902,6 +902,7 @@ setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::VECREDUCE_ADD); + setTargetDAGCombine(ISD::STEP_VECTOR); setTargetDAGCombine(ISD::GlobalAddress); @@ -1149,7 +1150,6 @@ setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); - setOperationAction(ISD::STEP_VECTOR, VT, Custom); setOperationAction(ISD::UMUL_LOHI, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); @@ -4474,8 +4474,6 @@ return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::SPLAT_VECTOR: return LowerSPLAT_VECTOR(Op, DAG); - case ISD::STEP_VECTOR: - return LowerSTEP_VECTOR(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); case ISD::INSERT_SUBVECTOR: @@ -9160,20 +9158,6 @@ return GenerateTBL(Op, ShuffleMask, DAG); } -SDValue AArch64TargetLowering::LowerSTEP_VECTOR(SDValue Op, - SelectionDAG &DAG) const { - SDLoc dl(Op); - EVT VT = Op.getValueType(); - assert(VT.isScalableVector() && - "Only expect scalable vectors for STEP_VECTOR"); - assert(VT.getScalarType() != MVT::i1 && - "Vectors of i1 types not supported for STEP_VECTOR"); - - SDValue StepVal = Op.getOperand(0); - SDValue Zero = DAG.getConstant(0, dl, StepVal.getValueType()); - return DAG.getNode(AArch64ISD::INDEX_VECTOR, dl, VT, Zero, StepVal); -} - SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -9259,9 +9243,7 @@ SDValue SplatOne = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, One); // create the vector 0,1,0,1,... - SDValue Zero = DAG.getConstant(0, DL, MVT::i64); - SDValue SV = DAG.getNode(AArch64ISD::INDEX_VECTOR, - DL, MVT::nxv2i64, Zero, One); + SDValue SV = DAG.getNode(ISD::STEP_VECTOR, DL, MVT::nxv2i64, One); SV = DAG.getNode(ISD::AND, DL, MVT::nxv2i64, SV, SplatOne); // create the vector idx64,idx64+1,idx64,idx64+1,... @@ -13670,8 +13652,19 @@ Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); } - return DAG.getNode(AArch64ISD::INDEX_VECTOR, DL, N->getValueType(0), - Op1, Op2); + if (isa(Op2)) { + // Lower index_vector(base, step) to step_vector(step) + splat(base). + SDValue One = DAG.getConstant(1, DL, Op2.getValueType()); + SDValue StepVector = + DAG.getNode(ISD::STEP_VECTOR, DL, N->getValueType(0), One); + SDValue Step = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op2); + SDValue Mul = + DAG.getNode(ISD::MUL, DL, N->getValueType(0), StepVector, Step); + SDValue Base = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op1); + return DAG.getNode(ISD::ADD, DL, N->getValueType(0), Mul, Base); + } else + return DAG.getNode(AArch64ISD::INDEX_VECTOR, DL, N->getValueType(0), Op1, + Op2); } static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG) { @@ -15461,6 +15454,19 @@ DAG.getConstant(MinOffset, DL, MVT::i64)); } +static SDValue performStepVectorCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + if (!DCI.isAfterLegalizeDAG()) + return SDValue(); + + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue StepVal = N->getOperand(0); + SDValue Zero = DAG.getConstant(0, DL, StepVal.getValueType()); + return DAG.getNode(AArch64ISD::INDEX_VECTOR, DL, VT, Zero, StepVal); +} + // Turns the vector of indices into a vector of byte offstes by scaling Offset // by (BitWidth / 8). static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset, @@ -15975,6 +15981,8 @@ return performExtractVectorEltCombine(N, DAG); case ISD::VECREDUCE_ADD: return performVecReduceAddCombine(N, DCI.DAG, Subtarget); + case ISD::STEP_VECTOR: + return performStepVectorCombine(N, DCI, DAG); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast(N->getOperand(1))->getZExtValue()) { diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll @@ -46,6 +46,18 @@ ret %out } +define @index_ii_range_combine(i16 %a) { +; CHECK-LABEL: index_ii_range_combine: +; CHECK: index z0.h, #2, #8 +; CHECK-NEXT: ret + %val = insertelement poison, i16 2, i32 0 + %val1 = shufflevector %val, poison, zeroinitializer + %val2 = call @llvm.aarch64.sve.index.nxv8i16(i16 0, i16 2) + %val3 = shl %val2, %val1 + %out = add %val3, %val1 + ret %out +} + ; ; INDEX (IMMEDIATE, SCALAR) ;