diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -601,15 +601,12 @@ /// STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised /// of a linear sequence of unsigned values starting from 0 with a step of - /// IMM, where IMM must be a vector index constant integer value which must - /// fit in the vector element type. - /// Note that IMM may be a smaller type than the vector element type, in - /// which case the step is implicitly sign-extended to the vector element - /// type. IMM may also be a larger type than the vector element type, in - /// which case the step is implicitly truncated to the vector element type. + /// IMM, where IMM must be a TargetConstant with type equal to the vector + /// element type. The arithmetic is performed modulo the bitwidth of the + /// element. + /// /// The operation does not support returning fixed-width vectors or - /// non-constant operands. If the sequence value exceeds the limit allowed - /// for the element type then the values for those lanes are undefined. + /// non-constant operands. STEP_VECTOR, /// MULHU/MULHS - Multiply high - Multiply two integers of type iN, diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -835,7 +835,7 @@ /// Returns a vector of type ResVT whose elements contain the linear sequence /// <0, Step, Step * 2, Step * 3, ...> - SDValue getStepVector(const SDLoc &DL, EVT ResVT, SDValue Step); + SDValue getStepVector(const SDLoc &DL, EVT ResVT, APInt StepVal); /// Returns a vector of type ResVT whose elements contain the linear sequence /// <0, 1, 2, 3, ...> diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2526,8 +2526,7 @@ N1.getOpcode() == ISD::STEP_VECTOR) { const APInt &C0 = N0->getConstantOperandAPInt(0); const APInt &C1 = N1->getConstantOperandAPInt(0); - EVT SVT = N0.getOperand(0).getValueType(); - SDValue NewStep = DAG.getConstant(C0 + C1, DL, SVT); + APInt NewStep = C0 + C1; return DAG.getStepVector(DL, VT, NewStep); } @@ -2537,11 +2536,7 @@ (N1.getOpcode() == ISD::STEP_VECTOR)) { const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0); const APInt &SV1 = N1->getConstantOperandAPInt(0); - EVT SVT = N1.getOperand(0).getValueType(); - assert(N1.getOperand(0).getValueType() == - N0.getOperand(1)->getOperand(0).getValueType() && - "Different operand types of STEP_VECTOR."); - SDValue NewStep = DAG.getConstant(SV0 + SV1, DL, SVT); + APInt NewStep = SV0 + SV1; SDValue SV = DAG.getStepVector(DL, VT, NewStep); return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV); } @@ -3576,8 +3571,7 @@ // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C)) if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) { - SDValue NewStep = DAG.getConstant(-N1.getConstantOperandAPInt(0), DL, - N1.getOperand(0).getValueType()); + APInt NewStep = -N1.getConstantOperandAPInt(0); return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getStepVector(DL, VT, NewStep)); } @@ -3961,9 +3955,7 @@ if (N0.getOpcode() == ISD::STEP_VECTOR) if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) { const APInt &C0 = N0.getConstantOperandAPInt(0); - EVT SVT = N0.getOperand(0).getValueType(); - SDValue NewStep = DAG.getConstant( - C0 * MulVal.sextOrTrunc(SVT.getSizeInBits()), SDLoc(N), SVT); + APInt NewStep = C0 * MulVal; return DAG.getStepVector(SDLoc(N), VT, NewStep); } @@ -8476,10 +8468,10 @@ if (N0.getOpcode() == ISD::STEP_VECTOR) if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) { const APInt &C0 = N0.getConstantOperandAPInt(0); - EVT SVT = N0.getOperand(0).getValueType(); - SDValue NewStep = DAG.getConstant( - C0 << ShlVal.sextOrTrunc(SVT.getSizeInBits()), SDLoc(N), SVT); - return DAG.getStepVector(SDLoc(N), VT, NewStep); + if (ShlVal.ult(C0.getBitWidth())) { + APInt NewStep = C0 << ShlVal; + return DAG.getStepVector(SDLoc(N), VT, NewStep); + } } return SDValue(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -4861,11 +4861,9 @@ EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "Type must be promoted to a vector type"); - EVT NOutElemVT = TLI.getTypeToTransformTo(*DAG.getContext(), - NOutVT.getVectorElementType()); APInt StepVal = cast(N->getOperand(0))->getAPIntValue(); - SDValue Step = DAG.getConstant(StepVal.getSExtValue(), dl, NOutElemVT); - return DAG.getStepVector(dl, NOutVT, Step); + return DAG.getStepVector(dl, NOutVT, + StepVal.sext(NOutVT.getScalarSizeInBits())); } SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1748,19 +1748,21 @@ } SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) { - EVT OpVT = TLI->getTypeToTransformTo(*getContext(), ResVT.getScalarType()); - return getStepVector(DL, ResVT, getConstant(1, DL, OpVT)); + APInt One(ResVT.getScalarSizeInBits(), 1); + return getStepVector(DL, ResVT, One); } -SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT, SDValue Step) { +SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT, APInt StepVal) { + assert(ResVT.getScalarSizeInBits() == StepVal.getBitWidth()); if (ResVT.isScalableVector()) - return getNode(ISD::STEP_VECTOR, DL, ResVT, Step); + return getNode( + ISD::STEP_VECTOR, DL, ResVT, + getTargetConstant(StepVal, DL, ResVT.getVectorElementType())); - EVT OpVT = Step.getValueType(); - APInt StepVal = cast(Step)->getAPIntValue(); SmallVector OpsStepConstants; for (uint64_t i = 0; i < ResVT.getVectorNumElements(); i++) - OpsStepConstants.push_back(getConstant(StepVal * i, DL, OpVT)); + OpsStepConstants.push_back( + getConstant(StepVal * i, DL, ResVT.getVectorElementType())); return getBuildVector(ResVT, DL, OpsStepConstants); } @@ -4780,14 +4782,9 @@ case ISD::STEP_VECTOR: assert(VT.isScalableVector() && "STEP_VECTOR can only be used with scalable types"); - assert(VT.getScalarSizeInBits() >= 8 && - "STEP_VECTOR can only be used with vectors of integers that are at " - "least 8 bits wide"); - assert(isa(Operand) && - cast(Operand)->getAPIntValue().isSignedIntN( - VT.getScalarSizeInBits()) && - "Expected STEP_VECTOR integer constant to fit in " - "the vector element type"); + assert(OpOpcode == ISD::TargetConstant && + VT.getVectorElementType() == Operand.getValueType() && + "Unexpected step operand"); break; case ISD::FREEZE: assert(VT == Operand.getValueType() && "Unexpected VT!"); diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -4835,6 +4835,14 @@ //SVE Index Generation Group //===----------------------------------------------------------------------===// +def simm5_8b_tgt : TImmLeaf= -16 && (int8_t)Imm < 16; }]>; +def simm5_16b_tgt : TImmLeaf= -16 && (int16_t)Imm < 16; }]>; +def simm5_32b_tgt : TImmLeaf= -16 && (int32_t)Imm < 16; }]>; +def simm5_64b_tgt : TImmLeaf= -16 && (int64_t)Imm < 16; }]>; +def i64imm_32bit_tgt : TImmLeaf(Imm); +}]>; + class sve_int_index_ii sz8_64, string asm, ZPRRegOp zprty, Operand imm_ty> : I<(outs zprty:$Zd), (ins imm_ty:$imm5, imm_ty:$imm5b), @@ -4858,23 +4866,23 @@ def _S : sve_int_index_ii<0b10, asm, ZPR32, simm5_32b>; def _D : sve_int_index_ii<0b11, asm, ZPR64, simm5_64b>; - def : Pat<(nxv16i8 (step_vector simm5_8b:$imm5b)), - (!cast(NAME # "_B") (i32 0), simm5_8b:$imm5b)>; - def : Pat<(nxv8i16 (step_vector simm5_16b:$imm5b)), - (!cast(NAME # "_H") (i32 0), simm5_16b:$imm5b)>; - def : Pat<(nxv4i32 (step_vector simm5_32b:$imm5b)), + def : Pat<(nxv16i8 (step_vector simm5_8b_tgt:$imm5b)), + (!cast(NAME # "_B") (i32 0), (!cast("trunc_imm") $imm5b))>; + def : Pat<(nxv8i16 (step_vector simm5_16b_tgt:$imm5b)), + (!cast(NAME # "_H") (i32 0), (!cast("trunc_imm") $imm5b))>; + def : Pat<(nxv4i32 (step_vector simm5_32b_tgt:$imm5b)), (!cast(NAME # "_S") (i32 0), simm5_32b:$imm5b)>; - def : Pat<(nxv2i64 (step_vector simm5_64b:$imm5b)), + def : Pat<(nxv2i64 (step_vector simm5_64b_tgt:$imm5b)), (!cast(NAME # "_D") (i64 0), simm5_64b:$imm5b)>; // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b:$imm5b)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), - (!cast(NAME # "_B") simm5_8b:$imm5, simm5_8b:$imm5b)>; - def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b:$imm5b)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), - (!cast(NAME # "_H") simm5_16b:$imm5, simm5_16b:$imm5b)>; - def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b:$imm5b)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), + def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5b)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), + (!cast(NAME # "_B") simm5_8b:$imm5, (!cast("trunc_imm") $imm5b))>; + def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5b)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), + (!cast(NAME # "_H") simm5_16b:$imm5, (!cast("trunc_imm") $imm5b))>; + def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5b)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), (!cast(NAME # "_S") simm5_32b:$imm5, simm5_32b:$imm5b)>; - def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b:$imm5b)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5b)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), (!cast(NAME # "_D") simm5_64b:$imm5, simm5_64b:$imm5b)>; } @@ -4901,33 +4909,33 @@ def _S : sve_int_index_ir<0b10, asm, ZPR32, GPR32, simm5_32b>; def _D : sve_int_index_ir<0b11, asm, ZPR64, GPR64, simm5_64b>; - def : Pat<(nxv16i8 (step_vector (i32 imm:$imm))), - (!cast(NAME # "_B") (i32 0), (!cast("MOVi32imm") imm:$imm))>; - def : Pat<(nxv8i16 (step_vector (i32 imm:$imm))), - (!cast(NAME # "_H") (i32 0), (!cast("MOVi32imm") imm:$imm))>; - def : Pat<(nxv4i32 (step_vector (i32 imm:$imm))), - (!cast(NAME # "_S") (i32 0), (!cast("MOVi32imm") imm:$imm))>; - def : Pat<(nxv2i64 (step_vector (i64 imm:$imm))), - (!cast(NAME # "_D") (i64 0), (!cast("MOVi64imm") imm:$imm))>; - def : Pat<(nxv2i64 (step_vector (i64 !cast("i64imm_32bit"):$imm))), - (!cast(NAME # "_D") (i64 0), (SUBREG_TO_REG (i64 0), (!cast("MOVi32imm") (!cast("trunc_imm") imm:$imm)), sub_32))>; + def : Pat<(nxv16i8 (step_vector i8:$imm)), + (!cast(NAME # "_B") (i32 0), (!cast("MOVi32imm") (!cast("trunc_imm") $imm)))>; + def : Pat<(nxv8i16 (step_vector i16:$imm)), + (!cast(NAME # "_H") (i32 0), (!cast("MOVi32imm") (!cast("trunc_imm") $imm)))>; + def : Pat<(nxv4i32 (step_vector i32:$imm)), + (!cast(NAME # "_S") (i32 0), (!cast("MOVi32imm") $imm))>; + def : Pat<(nxv2i64 (step_vector i64:$imm)), + (!cast(NAME # "_D") (i64 0), (!cast("MOVi64imm") $imm))>; + def : Pat<(nxv2i64 (step_vector i64imm_32bit_tgt:$imm)), + (!cast(NAME # "_D") (i64 0), (SUBREG_TO_REG (i64 0), (!cast("MOVi32imm") (!cast("trunc_imm") $imm)), sub_32))>; // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (step_vector_oneuse (i32 imm:$imm))), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), - (!cast(NAME # "_B") simm5_8b:$imm5, (!cast("MOVi32imm") imm:$imm))>; - def : Pat<(add (nxv8i16 (step_vector_oneuse (i32 imm:$imm))), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), - (!cast(NAME # "_H") simm5_16b:$imm5, (!cast("MOVi32imm") imm:$imm))>; - def : Pat<(add (nxv4i32 (step_vector_oneuse (i32 imm:$imm))), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), - (!cast(NAME # "_S") simm5_32b:$imm5, (!cast("MOVi32imm") imm:$imm))>; - def : Pat<(add (nxv2i64 (step_vector_oneuse (i64 imm:$imm))), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), - (!cast(NAME # "_D") simm5_64b:$imm5, (!cast("MOVi64imm") imm:$imm))>; - def : Pat<(add (nxv2i64 (step_vector_oneuse (i64 !cast("i64imm_32bit"):$imm))), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), - (!cast(NAME # "_D") simm5_64b:$imm5, (SUBREG_TO_REG (i64 0), (!cast("MOVi32imm") (!cast("trunc_imm") imm:$imm)), sub_32))>; + def : Pat<(add (nxv16i8 (step_vector_oneuse i8:$imm)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), + (!cast(NAME # "_B") simm5_8b:$imm5, (!cast("MOVi32imm") (!cast("trunc_imm") $imm)))>; + def : Pat<(add (nxv8i16 (step_vector_oneuse i16:$imm)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), + (!cast(NAME # "_H") simm5_16b:$imm5, (!cast("MOVi32imm") (!cast("trunc_imm") $imm)))>; + def : Pat<(add (nxv4i32 (step_vector_oneuse i32:$imm)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), + (!cast(NAME # "_S") simm5_32b:$imm5, (!cast("MOVi32imm") $imm))>; + def : Pat<(add (nxv2i64 (step_vector_oneuse i64:$imm)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + (!cast(NAME # "_D") simm5_64b:$imm5, (!cast("MOVi64imm") $imm))>; + def : Pat<(add (nxv2i64 (step_vector_oneuse i64imm_32bit_tgt:$imm)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + (!cast(NAME # "_D") simm5_64b:$imm5, (SUBREG_TO_REG (i64 0), (!cast("MOVi32imm") (!cast("trunc_imm") $imm)), sub_32))>; // mul(step_vector(1), dup(Y)) -> index(0, Y). - def : Pat<(mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (!cast(NAME # "_B") (i32 0), GPR32:$Rm)>; - def : Pat<(mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), (!cast(NAME # "_H") (i32 0), GPR32:$Rm)>; def : Pat<(mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), (!cast(NAME # "_S") (i32 0), GPR32:$Rm)>; @@ -4935,9 +4943,9 @@ (!cast(NAME # "_D") (i64 0), GPR64:$Rm)>; // add(mul(step_vector(1), dup(Y)), dup(X)) -> index(X, Y). - def : Pat<(add (muloneuseop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), + def : Pat<(add (muloneuseop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), (!cast(NAME # "_B") simm5_8b:$imm5, GPR32:$Rm)>; - def : Pat<(add (muloneuseop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), + def : Pat<(add (muloneuseop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), (!cast(NAME # "_H") simm5_16b:$imm5, GPR32:$Rm)>; def : Pat<(add (muloneuseop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), (!cast(NAME # "_S") simm5_32b:$imm5, GPR32:$Rm)>; @@ -4969,13 +4977,13 @@ def _D : sve_int_index_ri<0b11, asm, ZPR64, GPR64, simm5_64b>; // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b:$imm5)), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), - (!cast(NAME # "_B") GPR32:$Rm, simm5_8b:$imm5)>; - def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b:$imm5)), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), - (!cast(NAME # "_H") GPR32:$Rm, simm5_16b:$imm5)>; - def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b:$imm5)), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5)), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), + (!cast(NAME # "_B") GPR32:$Rm, (!cast("trunc_imm") $imm5))>; + def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5)), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), + (!cast(NAME # "_H") GPR32:$Rm, (!cast("trunc_imm") $imm5))>; + def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5)), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), (!cast(NAME # "_S") GPR32:$Rm, simm5_32b:$imm5)>; - def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b:$imm5)), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), + def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5)), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), (!cast(NAME # "_D") GPR64:$Rm, simm5_64b:$imm5)>; } @@ -5003,21 +5011,21 @@ def _D : sve_int_index_rr<0b11, asm, ZPR64, GPR64>; // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (step_vector_oneuse (i32 imm:$imm))), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), - (!cast(NAME # "_B") GPR32:$Rn, (!cast("MOVi32imm") imm:$imm))>; - def : Pat<(add (nxv8i16 (step_vector_oneuse (i32 imm:$imm))), (nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), - (!cast(NAME # "_H") GPR32:$Rn, (!cast("MOVi32imm") imm:$imm))>; - def : Pat<(add (nxv4i32 (step_vector_oneuse (i32 imm:$imm))), (nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), - (!cast(NAME # "_S") GPR32:$Rn, (!cast("MOVi32imm") imm:$imm))>; - def : Pat<(add (nxv2i64 (step_vector_oneuse (i64 imm:$imm))), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), - (!cast(NAME # "_D") GPR64:$Rn, (!cast("MOVi64imm") imm:$imm))>; - def : Pat<(add (nxv2i64 (step_vector_oneuse (i64 !cast("i64imm_32bit"):$imm))), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), - (!cast(NAME # "_D") GPR64:$Rn, (SUBREG_TO_REG (i64 0), (!cast("MOVi32imm") (!cast("trunc_imm") imm:$imm)), sub_32))>; + def : Pat<(add (nxv16i8 (step_vector_oneuse i8:$imm)), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), + (!cast(NAME # "_B") GPR32:$Rn, (!cast("MOVi32imm") (!cast("trunc_imm") $imm)))>; + def : Pat<(add (nxv8i16 (step_vector_oneuse i16:$imm)), (nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), + (!cast(NAME # "_H") GPR32:$Rn, (!cast("MOVi32imm") (!cast("trunc_imm") $imm)))>; + def : Pat<(add (nxv4i32 (step_vector_oneuse i32:$imm)), (nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), + (!cast(NAME # "_S") GPR32:$Rn, (!cast("MOVi32imm") $imm))>; + def : Pat<(add (nxv2i64 (step_vector_oneuse i64:$imm)), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), + (!cast(NAME # "_D") GPR64:$Rn, (!cast("MOVi64imm") $imm))>; + def : Pat<(add (nxv2i64 (step_vector_oneuse i64imm_32bit_tgt:$imm)), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), + (!cast(NAME # "_D") GPR64:$Rn, (SUBREG_TO_REG (i64 0), (!cast("MOVi32imm") (!cast("trunc_imm") $imm)), sub_32))>; // add(mul(step_vector(1), dup(Y)), dup(X)) -> index(X, Y). - def : Pat<(add (mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), (!cast(NAME # "_B") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))),(nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))),(nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), (!cast(NAME # "_H") GPR32:$Rn, GPR32:$Rm)>; def : Pat<(add (mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))),(nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), (!cast(NAME # "_S") GPR32:$Rn, GPR32:$Rm)>; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4070,16 +4070,15 @@ SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); uint64_t StepValImm = Op.getConstantOperandVal(0); if (StepValImm != 1) { - assert(Op.getOperand(0).getValueType() == XLenVT && - "Unexpected step value type"); if (isPowerOf2_64(StepValImm)) { SDValue StepVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getConstant(Log2_64(StepValImm), DL, XLenVT)); StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal); } else { - SDValue StepVal = - DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Op.getOperand(0)); + SDValue StepVal = lowerScalarSplat( + DAG.getConstant(StepValImm, DL, VT.getVectorElementType()), VL, VT, + DL, DAG, Subtarget); StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal); } } diff --git a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64 declare @llvm.experimental.stepvector.nxv1i8() @@ -431,6 +431,43 @@ ret %3 } +define @mul_bigimm_stepvector_nxv8i64() { +; RV32-LABEL: mul_bigimm_stepvector_nxv8i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi a0, zero, 7 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: lui a0, 797989 +; RV32-NEXT: addi a0, a0, -683 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: vid.v v16 +; RV32-NEXT: vmul.vv v8, v16, v8 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: mul_bigimm_stepvector_nxv8i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV64-NEXT: vid.v v8 +; RV64-NEXT: lui a0, 1987 +; RV64-NEXT: addiw a0, a0, -731 +; RV64-NEXT: slli a0, a0, 12 +; RV64-NEXT: addi a0, a0, -683 +; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: ret +entry: + %0 = insertelement poison, i64 33333333333, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv8i64() + %3 = mul %2, %1 + ret %3 +} + + define @shl_stepvector_nxv8i64() { ; CHECK-LABEL: shl_stepvector_nxv8i64: ; CHECK: # %bb.0: # %entry @@ -449,27 +486,59 @@ declare @llvm.experimental.stepvector.nxv16i64() define @stepvector_nxv16i64() { -; CHECK-LABEL: stepvector_nxv16i64: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vadd.vx v16, v8, a0 -; CHECK-NEXT: ret +; RV32-LABEL: stepvector_nxv16i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vid.v v8 +; RV32-NEXT: vadd.vv v16, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: stepvector_nxv16i64: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vid.v v8 +; RV64-NEXT: vadd.vx v16, v8, a0 +; RV64-NEXT: ret %v = call @llvm.experimental.stepvector.nxv16i64() ret %v } define @add_stepvector_nxv16i64() { -; CHECK-LABEL: add_stepvector_nxv16i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vsll.vi v8, v8, 1 -; CHECK-NEXT: vadd.vx v16, v8, a0 -; CHECK-NEXT: ret +; RV32-LABEL: add_stepvector_nxv16i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vid.v v8 +; RV32-NEXT: vsll.vi v8, v8, 1 +; RV32-NEXT: vadd.vv v16, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: add_stepvector_nxv16i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vid.v v8 +; RV64-NEXT: vsll.vi v8, v8, 1 +; RV64-NEXT: vadd.vx v16, v8, a0 +; RV64-NEXT: ret entry: %0 = call @llvm.experimental.stepvector.nxv16i64() %1 = call @llvm.experimental.stepvector.nxv16i64() @@ -478,17 +547,39 @@ } define @mul_stepvector_nxv16i64() { -; CHECK-LABEL: mul_stepvector_nxv16i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: addi a0, zero, 3 -; CHECK-NEXT: vmul.vx v8, v8, a0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 1 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: vadd.vx v16, v8, a0 -; CHECK-NEXT: ret +; RV32-LABEL: mul_stepvector_nxv16i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a1, a0, 3 +; RV32-NEXT: addi a2, zero, 24 +; RV32-NEXT: mulhu a1, a1, a2 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vid.v v8 +; RV32-NEXT: addi a0, zero, 3 +; RV32-NEXT: vmul.vx v8, v8, a0 +; RV32-NEXT: vadd.vv v16, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: mul_stepvector_nxv16i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV64-NEXT: vid.v v8 +; RV64-NEXT: addi a0, zero, 3 +; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: vadd.vx v16, v8, a0 +; RV64-NEXT: ret entry: %0 = insertelement poison, i64 3, i32 0 %1 = shufflevector %0, poison, zeroinitializer @@ -497,16 +588,88 @@ ret %3 } +define @mul_bigimm_stepvector_nxv16i64() { +; RV32-LABEL: mul_bigimm_stepvector_nxv16i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi a0, zero, 7 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: lui a0, 797989 +; RV32-NEXT: addi a0, a0, -683 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: lui a1, 11557 +; RV32-NEXT: addi a1, a1, -683 +; RV32-NEXT: mul a1, a0, a1 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: srli a0, a0, 3 +; RV32-NEXT: addi a1, zero, 62 +; RV32-NEXT: mul a1, a0, a1 +; RV32-NEXT: lui a2, 92455 +; RV32-NEXT: addi a2, a2, -1368 +; RV32-NEXT: mulhu a0, a0, a2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vid.v v24 +; RV32-NEXT: vmul.vv v8, v24, v8 +; RV32-NEXT: vadd.vv v16, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: mul_bigimm_stepvector_nxv16i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: lui a1, 1987 +; RV64-NEXT: addiw a1, a1, -731 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -683 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vid.v v8 +; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: vadd.vx v16, v8, a0 +; RV64-NEXT: ret +entry: + %0 = insertelement poison, i64 33333333333, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv16i64() + %3 = mul %2, %1 + ret %3 +} + define @shl_stepvector_nxv16i64() { -; CHECK-LABEL: shl_stepvector_nxv16i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vsll.vi v8, v8, 2 -; CHECK-NEXT: vadd.vx v16, v8, a0 -; CHECK-NEXT: ret +; RV32-LABEL: shl_stepvector_nxv16i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vid.v v8 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vadd.vv v16, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: shl_stepvector_nxv16i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vid.v v8 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vadd.vx v16, v8, a0 +; RV64-NEXT: ret entry: %0 = insertelement poison, i64 2, i32 0 %1 = shufflevector %0, poison, zeroinitializer