diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -594,15 +594,12 @@ /// STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised /// of a linear sequence of unsigned values starting from 0 with a step of - /// IMM, where IMM must be a vector index constant integer value which must - /// fit in the vector element type. - /// Note that IMM may be a smaller type than the vector element type, in - /// which case the step is implicitly sign-extended to the vector element - /// type. IMM may also be a larger type than the vector element type, in - /// which case the step is implicitly truncated to the vector element type. + /// IMM, where IMM must be a TargetConstant with type equal to the vector + /// element type. The arithmetic is performed modulo the bitwidth of the + /// element. + /// /// The operation does not support returning fixed-width vectors or - /// non-constant operands. If the sequence value exceeds the limit allowed - /// for the element type then the values for those lanes are undefined. + /// non-constant operands. STEP_VECTOR, /// MULHU/MULHS - Multiply high - Multiply two integers of type iN, diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -835,7 +835,7 @@ /// Returns a vector of type ResVT whose elements contain the linear sequence /// <0, Step, Step * 2, Step * 3, ...> - SDValue getStepVector(const SDLoc &DL, EVT ResVT, SDValue Step); + SDValue getStepVector(const SDLoc &DL, EVT ResVT, APInt StepVal); /// Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to /// the shuffle node in input but with swapped operands. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2525,8 +2525,7 @@ N1.getOpcode() == ISD::STEP_VECTOR) { const APInt &C0 = N0->getConstantOperandAPInt(0); const APInt &C1 = N1->getConstantOperandAPInt(0); - EVT SVT = N0.getOperand(0).getValueType(); - SDValue NewStep = DAG.getConstant(C0 + C1, DL, SVT); + APInt NewStep = C0 + C1; return DAG.getStepVector(DL, VT, NewStep); } @@ -2536,11 +2535,7 @@ (N1.getOpcode() == ISD::STEP_VECTOR)) { const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0); const APInt &SV1 = N1->getConstantOperandAPInt(0); - EVT SVT = N1.getOperand(0).getValueType(); - assert(N1.getOperand(0).getValueType() == - N0.getOperand(1)->getOperand(0).getValueType() && - "Different operand types of STEP_VECTOR."); - SDValue NewStep = DAG.getConstant(SV0 + SV1, DL, SVT); + APInt NewStep = SV0 + SV1; SDValue SV = DAG.getStepVector(DL, VT, NewStep); return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV); } @@ -3574,8 +3569,7 @@ // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C)) if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) { - SDValue NewStep = DAG.getConstant(-N1.getConstantOperandAPInt(0), DL, - N1.getOperand(0).getValueType()); + APInt NewStep = -N1.getConstantOperandAPInt(0); return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getStepVector(DL, VT, NewStep)); } @@ -3959,9 +3953,7 @@ if (N0.getOpcode() == ISD::STEP_VECTOR) if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) { const APInt &C0 = N0.getConstantOperandAPInt(0); - EVT SVT = N0.getOperand(0).getValueType(); - SDValue NewStep = DAG.getConstant( - C0 * MulVal.sextOrTrunc(SVT.getSizeInBits()), SDLoc(N), SVT); + APInt NewStep = C0 * MulVal; return DAG.getStepVector(SDLoc(N), VT, NewStep); } @@ -8474,10 +8466,10 @@ if (N0.getOpcode() == ISD::STEP_VECTOR) if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) { const APInt &C0 = N0.getConstantOperandAPInt(0); - EVT SVT = N0.getOperand(0).getValueType(); - SDValue NewStep = DAG.getConstant( - C0 << ShlVal.sextOrTrunc(SVT.getSizeInBits()), SDLoc(N), SVT); - return DAG.getStepVector(SDLoc(N), VT, NewStep); + if (ShlVal.ult(C0.getBitWidth())) { + APInt NewStep = C0 << ShlVal; + return DAG.getStepVector(SDLoc(N), VT, NewStep); + } } return SDValue(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2247,6 +2247,10 @@ case ISD::FSHR: ExpandIntRes_FunnelShift(N, Lo, Hi); break; + + case ISD::VSCALE: + ExpandIntRes_VSCALE(N, Lo, Hi); + break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -4198,6 +4202,21 @@ SplitInteger(Res, Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT VT = N->getValueType(0); + EVT HalfVT = + EVT::getIntegerVT(*DAG.getContext(), N->getValueSizeInBits(0) / 2); + SDLoc dl(N); + + // We assume VSCALE(1) fits into a legal integer. + APInt One(HalfVT.getSizeInBits(), 1); + SDValue VScaleBase = DAG.getVScale(dl, HalfVT, One); + VScaleBase = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, VScaleBase); + SDValue Res = DAG.getNode(ISD::MUL, dl, VT, VScaleBase, N->getOperand(0)); + SplitInteger(Res, Lo, Hi); +} + //===----------------------------------------------------------------------===// // Integer Operand Expansion //===----------------------------------------------------------------------===// @@ -4865,11 +4884,9 @@ EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "Type must be promoted to a vector type"); - EVT NOutElemVT = TLI.getTypeToTransformTo(*DAG.getContext(), - NOutVT.getVectorElementType()); APInt StepVal = cast(N->getOperand(0))->getAPIntValue(); - SDValue Step = DAG.getConstant(StepVal.getSExtValue(), dl, NOutElemVT); - return DAG.getStepVector(dl, NOutVT, Step); + return DAG.getStepVector(dl, NOutVT, + StepVal.sext(NOutVT.getScalarSizeInBits())); } SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -465,6 +465,8 @@ void ExpandIntRes_Rotate (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FunnelShift (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_VSCALE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandShiftByConstant(SDNode *N, const APInt &Amt, SDValue &Lo, SDValue &Hi); bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1747,15 +1747,17 @@ return SDValue(CondCodeNodes[Cond], 0); } -SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT, SDValue Step) { +SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT, APInt StepVal) { + assert(ResVT.getScalarSizeInBits() == StepVal.getBitWidth()); if (ResVT.isScalableVector()) - return getNode(ISD::STEP_VECTOR, DL, ResVT, Step); + return getNode( + ISD::STEP_VECTOR, DL, ResVT, + getTargetConstant(StepVal, DL, ResVT.getVectorElementType())); - EVT OpVT = Step.getValueType(); - APInt StepVal = cast(Step)->getAPIntValue(); SmallVector OpsStepConstants; for (uint64_t i = 0; i < ResVT.getVectorNumElements(); i++) - OpsStepConstants.push_back(getConstant(StepVal * i, DL, OpVT)); + OpsStepConstants.push_back( + getConstant(StepVal * i, DL, ResVT.getVectorElementType())); return getBuildVector(ResVT, DL, OpsStepConstants); } @@ -4778,14 +4780,9 @@ case ISD::STEP_VECTOR: assert(VT.isScalableVector() && "STEP_VECTOR can only be used with scalable types"); - assert(VT.getScalarSizeInBits() >= 8 && - "STEP_VECTOR can only be used with vectors of integers that are at " - "least 8 bits wide"); - assert(isa(Operand) && - cast(Operand)->getAPIntValue().isSignedIntN( - VT.getScalarSizeInBits()) && - "Expected STEP_VECTOR integer constant to fit in " - "the vector element type"); + assert(OpOpcode == ISD::TargetConstant && + VT.getVectorElementType() == Operand.getValueType() && + "Unexpected step operand"); break; case ISD::FREEZE: assert(VT == Operand.getValueType() && "Unexpected VT!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -11074,9 +11074,7 @@ const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto DL = getCurSDLoc(); EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - EVT OpVT = - TLI.getTypeToTransformTo(*DAG.getContext(), ResultVT.getScalarType()); - SDValue Step = DAG.getConstant(1, DL, OpVT); + APInt Step(ResultVT.getScalarSizeInBits(), 1); setValue(&I, DAG.getStepVector(DL, ResultVT, Step)); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9509,7 +9509,7 @@ SDValue SplatOne = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, One); // create the vector 0,1,0,1,... - SDValue SV = DAG.getNode(ISD::STEP_VECTOR, DL, MVT::nxv2i64, One); + SDValue SV = DAG.getStepVector(DL, MVT::nxv2i64, APInt(64, 1)); SV = DAG.getNode(ISD::AND, DL, MVT::nxv2i64, SV, SplatOne); // create the vector idx64,idx64+1,idx64,idx64+1,... @@ -14004,9 +14004,8 @@ ScalarTy = MVT::i32; // Lower index_vector(base, step) to mul(step step_vector(1)) + splat(base). - SDValue One = DAG.getConstant(1, DL, ScalarTy); - SDValue StepVector = - DAG.getNode(ISD::STEP_VECTOR, DL, N->getValueType(0), One); + APInt One(N->getValueType(0).getScalarSizeInBits(), 1); + SDValue StepVector = DAG.getStepVector(DL, N->getValueType(0), One); SDValue Step = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op2); SDValue Mul = DAG.getNode(ISD::MUL, DL, N->getValueType(0), StepVector, Step); SDValue Base = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op1); diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -364,6 +364,14 @@ let PrintMethod = "printSImm<16>"; } +def simm5_8b_tgt : TImmLeaf= -16 && (int8_t)Imm < 16; }]>; +def simm5_16b_tgt : TImmLeaf= -16 && (int16_t)Imm < 16; }]>; +def simm5_32b_tgt : TImmLeaf= -16 && (int32_t)Imm < 16; }]>; +def simm5_64b_tgt : TImmLeaf= -16 && (int64_t)Imm < 16; }]>; +def i64imm_32bit_tgt : TImmLeaf(Imm); +}]>; + // simm7sN predicate - True if the immediate is a multiple of N in the range // [-64 * N, 63 * N]. diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1377,10 +1377,112 @@ defm INCP_ZP : sve_int_count_v<0b10000, "incp">; defm DECP_ZP : sve_int_count_v<0b10100, "decp">; - defm INDEX_RR : sve_int_index_rr<"index", step_vector, step_vector_oneuse, AArch64mul_p_oneuse>; - defm INDEX_IR : sve_int_index_ir<"index", step_vector, step_vector_oneuse, AArch64mul_p, AArch64mul_p_oneuse>; - defm INDEX_RI : sve_int_index_ri<"index", step_vector, step_vector_oneuse>; - defm INDEX_II : sve_int_index_ii<"index", step_vector, step_vector_oneuse>; + defm INDEX_RR : sve_int_index_rr<"index">; + + // add(step_vector(step), dup(X)) -> index(X, step). + def : Pat<(add (nxv16i8 (step_vector_oneuse i8:$Rm)), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), + (INDEX_RR_B GPR32:$Rn, (MOVi32imm (trunc_imm $Rm)))>; + def : Pat<(add (nxv8i16 (step_vector_oneuse i16:$Rm)), (nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), + (INDEX_RR_H GPR32:$Rn, (MOVi32imm (trunc_imm $Rm)))>; + def : Pat<(add (nxv4i32 (step_vector_oneuse i32:$Rm)), (nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), + (INDEX_RR_S GPR32:$Rn, (MOVi32imm $Rm))>; + def : Pat<(add (nxv2i64 (step_vector_oneuse i64:$Rm)), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), + (INDEX_RR_D GPR64:$Rn, (MOVi64imm $Rm))>; + def : Pat<(add (nxv2i64 (step_vector_oneuse i64imm_32bit_tgt:$Rm)), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), + (INDEX_RR_D GPR64:$Rn, (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$Rm)), sub_32))>; + + // add(mul(step_vector(1), dup(Y), dup(X)) -> index(X, Y). + def : Pat<(add (AArch64mul_p_oneuse (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), + (INDEX_RR_B GPR32:$Rn, GPR32:$Rm)>; + def : Pat<(add (AArch64mul_p_oneuse (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))),(nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), + (INDEX_RR_H GPR32:$Rn, GPR32:$Rm)>; + def : Pat<(add (AArch64mul_p_oneuse (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))),(nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), + (INDEX_RR_S GPR32:$Rn, GPR32:$Rm)>; + def : Pat<(add (AArch64mul_p_oneuse (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))),(nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), + (INDEX_RR_D GPR64:$Rn, GPR64:$Rm)>; + + + defm INDEX_IR : sve_int_index_ir<"index">; + + // step_vector(step) -> index(0, step) + def : Pat<(nxv16i8 (step_vector i8:$Rm)), + (INDEX_IR_B (i32 0), (MOVi32imm (trunc_imm $Rm)))>; + def : Pat<(nxv8i16 (step_vector i16:$Rm)), + (INDEX_IR_H (i32 0), (MOVi32imm (trunc_imm $Rm)))>; + def : Pat<(nxv4i32 (step_vector i32:$Rm)), + (INDEX_IR_S (i32 0), (MOVi32imm GPR32:$Rm))>; + def : Pat<(nxv2i64 (step_vector i64:$Rm)), + (INDEX_IR_D (i64 0), (MOVi64imm GPR64:$Rm))>; + def : Pat<(nxv2i64 (step_vector i64imm_32bit_tgt:$Rm)), + (INDEX_IR_D (i64 0), (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$Rm)), sub_32))>; + + // add(step_vector(step), dup(X)) -> index(X, step). + def : Pat<(add (nxv16i8 (step_vector_oneuse i8:$Rm)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), + (INDEX_IR_B simm5_8b:$imm5, (MOVi32imm (trunc_imm $Rm)))>; + def : Pat<(add (nxv8i16 (step_vector_oneuse i16:$Rm)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), + (INDEX_IR_H simm5_16b:$imm5, (MOVi32imm (trunc_imm $Rm)))>; + def : Pat<(add (nxv4i32 (step_vector_oneuse i32:$Rmm)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), + (INDEX_IR_S simm5_32b:$imm5, (MOVi32imm GPR32:$Rmm))>; + def : Pat<(add (nxv2i64 (step_vector_oneuse i64:$Rm)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + (INDEX_IR_D simm5_64b:$imm5, (MOVi64imm GPR64:$Rm))>; + def : Pat<(add (nxv2i64 (step_vector_oneuse i64imm_32bit_tgt:$Rm)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + (INDEX_IR_D simm5_64b:$imm5, (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$Rm)), sub_32))>; + + // mul(step_vector(1), dup(Y)) -> index(0, Y). + def : Pat<(AArch64mul_p (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), + (INDEX_IR_B (i32 0), GPR32:$Rm)>; + def : Pat<(AArch64mul_p (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), + (INDEX_IR_H (i32 0), GPR32:$Rm)>; + def : Pat<(AArch64mul_p (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), + (INDEX_IR_S (i32 0), GPR32:$Rm)>; + def : Pat<(AArch64mul_p (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), + (INDEX_IR_D (i64 0), GPR64:$Rm)>; + + // add(mul(step_vector(1), dup(Y), dup(X)) -> index(X, Y). + def : Pat<(add (AArch64mul_p_oneuse (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), + (INDEX_IR_B simm5_8b:$imm5, GPR32:$Rm)>; + def : Pat<(add (AArch64mul_p_oneuse (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), + (INDEX_IR_H simm5_16b:$imm5, GPR32:$Rm)>; + def : Pat<(add (AArch64mul_p_oneuse (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), + (INDEX_IR_S simm5_32b:$imm5, GPR32:$Rm)>; + def : Pat<(add (AArch64mul_p_oneuse (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + (INDEX_IR_D simm5_64b:$imm5, GPR64:$Rm)>; + + + defm INDEX_RI : sve_int_index_ri<"index">; + + // add(step_vector(step), dup(X)) -> index(X, step). + def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5)), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), + (INDEX_RI_B GPR32:$Rm, (trunc_imm $imm5))>; + def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5)), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), + (INDEX_RI_H GPR32:$Rm, (trunc_imm $imm5))>; + def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5)), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), + (INDEX_RI_S GPR32:$Rm, simm5_32b:$imm5)>; + def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5)), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), + (INDEX_RI_D GPR64:$Rm, simm5_64b:$imm5)>; + + + defm INDEX_II : sve_int_index_ii<"index">; + + // step_vector(step) -> index(0, step). + def : Pat<(nxv16i8 (step_vector simm5_8b_tgt:$imm5b)), + (INDEX_II_B (i32 0), (trunc_imm $imm5b))>; + def : Pat<(nxv8i16 (step_vector simm5_16b_tgt:$imm5b)), + (INDEX_II_H (i32 0), (trunc_imm $imm5b))>; + def : Pat<(nxv4i32 (step_vector simm5_32b_tgt:$imm5b)), + (INDEX_II_S (i32 0), simm5_32b:$imm5b)>; + def : Pat<(nxv2i64 (step_vector simm5_64b_tgt:$imm5b)), + (INDEX_II_D (i64 0), simm5_64b:$imm5b)>; + + // add(step_vector(step), dup(X)) -> index(X, step). + def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5b)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), + (INDEX_II_B simm5_8b:$imm5, (trunc_imm $imm5b))>; + def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5b)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), + (INDEX_II_H simm5_16b:$imm5, (trunc_imm $imm5b))>; + def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5b)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), + (INDEX_II_S simm5_32b:$imm5, simm5_32b:$imm5b)>; + def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5b)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + (INDEX_II_D simm5_64b:$imm5, simm5_64b:$imm5b)>; // Unpredicated shifts defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_p>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -4852,30 +4852,11 @@ let Inst{4-0} = Zd; } -multiclass sve_int_index_ii { +multiclass sve_int_index_ii { def _B : sve_int_index_ii<0b00, asm, ZPR8, simm5_8b>; def _H : sve_int_index_ii<0b01, asm, ZPR16, simm5_16b>; def _S : sve_int_index_ii<0b10, asm, ZPR32, simm5_32b>; def _D : sve_int_index_ii<0b11, asm, ZPR64, simm5_64b>; - - def : Pat<(nxv16i8 (op simm5_8b:$imm5b)), - (!cast(NAME # "_B") (i32 0), simm5_8b:$imm5b)>; - def : Pat<(nxv8i16 (op simm5_16b:$imm5b)), - (!cast(NAME # "_H") (i32 0), simm5_16b:$imm5b)>; - def : Pat<(nxv4i32 (op simm5_32b:$imm5b)), - (!cast(NAME # "_S") (i32 0), simm5_32b:$imm5b)>; - def : Pat<(nxv2i64 (op simm5_64b:$imm5b)), - (!cast(NAME # "_D") (i64 0), simm5_64b:$imm5b)>; - - // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (oneuseop simm5_8b:$imm5b)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), - (!cast(NAME # "_B") simm5_8b:$imm5, simm5_8b:$imm5b)>; - def : Pat<(add (nxv8i16 (oneuseop simm5_16b:$imm5b)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), - (!cast(NAME # "_H") simm5_16b:$imm5, simm5_16b:$imm5b)>; - def : Pat<(add (nxv4i32 (oneuseop simm5_32b:$imm5b)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), - (!cast(NAME # "_S") simm5_32b:$imm5, simm5_32b:$imm5b)>; - def : Pat<(add (nxv2i64 (oneuseop simm5_64b:$imm5b)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), - (!cast(NAME # "_D") simm5_64b:$imm5, simm5_64b:$imm5b)>; } class sve_int_index_ir sz8_64, string asm, ZPRRegOp zprty, @@ -4895,50 +4876,11 @@ let Inst{4-0} = Zd; } -multiclass sve_int_index_ir { +multiclass sve_int_index_ir { def _B : sve_int_index_ir<0b00, asm, ZPR8, GPR32, simm5_8b>; def _H : sve_int_index_ir<0b01, asm, ZPR16, GPR32, simm5_16b>; def _S : sve_int_index_ir<0b10, asm, ZPR32, GPR32, simm5_32b>; def _D : sve_int_index_ir<0b11, asm, ZPR64, GPR64, simm5_64b>; - - def : Pat<(nxv16i8 (op GPR32:$Rm)), - (!cast(NAME # "_B") (i32 0), GPR32:$Rm)>; - def : Pat<(nxv8i16 (op GPR32:$Rm)), - (!cast(NAME # "_H") (i32 0), GPR32:$Rm)>; - def : Pat<(nxv4i32 (op GPR32:$Rm)), - (!cast(NAME # "_S") (i32 0), GPR32:$Rm)>; - def : Pat<(nxv2i64 (op GPR64:$Rm)), - (!cast(NAME # "_D") (i64 0), GPR64:$Rm)>; - - // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (oneuseop GPR32:$Rm)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), - (!cast(NAME # "_B") simm5_8b:$imm5, GPR32:$Rm)>; - def : Pat<(add (nxv8i16 (oneuseop GPR32:$Rm)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), - (!cast(NAME # "_H") simm5_16b:$imm5, GPR32:$Rm)>; - def : Pat<(add (nxv4i32 (oneuseop GPR32:$Rm)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), - (!cast(NAME # "_S") simm5_32b:$imm5, GPR32:$Rm)>; - def : Pat<(add (nxv2i64 (oneuseop GPR64:$Rm)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), - (!cast(NAME # "_D") simm5_64b:$imm5, GPR64:$Rm)>; - - // mul(step_vector(1), dup(Y)) -> index(0, Y). - def : Pat<(mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (oneuseop (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), - (!cast(NAME # "_B") (i32 0), GPR32:$Rm)>; - def : Pat<(mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (oneuseop (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), - (!cast(NAME # "_H") (i32 0), GPR32:$Rm)>; - def : Pat<(mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (oneuseop (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), - (!cast(NAME # "_S") (i32 0), GPR32:$Rm)>; - def : Pat<(mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (oneuseop (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), - (!cast(NAME # "_D") (i64 0), GPR64:$Rm)>; - - // add(mul(step_vector(1), dup(Y), dup(X)) -> index(X, Y). - def : Pat<(add (muloneuseop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (oneuseop (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), - (!cast(NAME # "_B") simm5_8b:$imm5, GPR32:$Rm)>; - def : Pat<(add (muloneuseop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (oneuseop (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), - (!cast(NAME # "_H") simm5_16b:$imm5, GPR32:$Rm)>; - def : Pat<(add (muloneuseop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (oneuseop (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), - (!cast(NAME # "_S") simm5_32b:$imm5, GPR32:$Rm)>; - def : Pat<(add (muloneuseop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (oneuseop (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), - (!cast(NAME # "_D") simm5_64b:$imm5, GPR64:$Rm)>; } class sve_int_index_ri sz8_64, string asm, ZPRRegOp zprty, @@ -4958,21 +4900,11 @@ let Inst{4-0} = Zd; } -multiclass sve_int_index_ri { +multiclass sve_int_index_ri { def _B : sve_int_index_ri<0b00, asm, ZPR8, GPR32, simm5_8b>; def _H : sve_int_index_ri<0b01, asm, ZPR16, GPR32, simm5_16b>; def _S : sve_int_index_ri<0b10, asm, ZPR32, GPR32, simm5_32b>; def _D : sve_int_index_ri<0b11, asm, ZPR64, GPR64, simm5_64b>; - - // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (oneuseop simm5_8b:$imm5)), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), - (!cast(NAME # "_B") GPR32:$Rm, simm5_8b:$imm5)>; - def : Pat<(add (nxv8i16 (oneuseop simm5_16b:$imm5)), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), - (!cast(NAME # "_H") GPR32:$Rm, simm5_16b:$imm5)>; - def : Pat<(add (nxv4i32 (oneuseop simm5_32b:$imm5)), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), - (!cast(NAME # "_S") GPR32:$Rm, simm5_32b:$imm5)>; - def : Pat<(add (nxv2i64 (oneuseop simm5_64b:$imm5)), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), - (!cast(NAME # "_D") GPR64:$Rm, simm5_64b:$imm5)>; } class sve_int_index_rr sz8_64, string asm, ZPRRegOp zprty, @@ -4992,31 +4924,11 @@ let Inst{4-0} = Zd; } -multiclass sve_int_index_rr { +multiclass sve_int_index_rr { def _B : sve_int_index_rr<0b00, asm, ZPR8, GPR32>; def _H : sve_int_index_rr<0b01, asm, ZPR16, GPR32>; def _S : sve_int_index_rr<0b10, asm, ZPR32, GPR32>; def _D : sve_int_index_rr<0b11, asm, ZPR64, GPR64>; - - // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (oneuseop GPR32:$Rm)), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), - (!cast(NAME # "_B") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (nxv8i16 (oneuseop GPR32:$Rm)), (nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), - (!cast(NAME # "_H") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (nxv4i32 (oneuseop GPR32:$Rm)), (nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), - (!cast(NAME # "_S") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (nxv2i64 (oneuseop GPR64:$Rm)), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), - (!cast(NAME # "_D") GPR64:$Rn, GPR64:$Rm)>; - - // add(mul(step_vector(1), dup(Y), dup(X)) -> index(X, Y). - def : Pat<(add (mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (oneuseop (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), - (!cast(NAME # "_B") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (oneuseop (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))),(nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), - (!cast(NAME # "_H") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (oneuseop (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))),(nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), - (!cast(NAME # "_S") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (oneuseop (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))),(nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), - (!cast(NAME # "_D") GPR64:$Rn, GPR64:$Rm)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4049,16 +4049,15 @@ SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); uint64_t StepValImm = Op.getConstantOperandVal(0); if (StepValImm != 1) { - assert(Op.getOperand(0).getValueType() == XLenVT && - "Unexpected step value type"); if (isPowerOf2_64(StepValImm)) { SDValue StepVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getConstant(Log2_64(StepValImm), DL, XLenVT)); StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal); } else { - SDValue StepVal = - DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Op.getOperand(0)); + SDValue StepVal = lowerScalarSplat( + DAG.getConstant(StepValImm, DL, VT.getVectorElementType()), VL, VT, + DL, DAG, Subtarget); StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal); } } diff --git a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64 declare @llvm.experimental.stepvector.nxv1i8() @@ -431,6 +431,43 @@ ret %3 } +define @mul_bigimm_stepvector_nxv8i64() { +; RV32-LABEL: mul_bigimm_stepvector_nxv8i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi a0, zero, 7 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: lui a0, 797989 +; RV32-NEXT: addi a0, a0, -683 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: vid.v v16 +; RV32-NEXT: vmul.vv v8, v16, v8 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: mul_bigimm_stepvector_nxv8i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV64-NEXT: vid.v v8 +; RV64-NEXT: lui a0, 1987 +; RV64-NEXT: addiw a0, a0, -731 +; RV64-NEXT: slli a0, a0, 12 +; RV64-NEXT: addi a0, a0, -683 +; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: ret +entry: + %0 = insertelement poison, i64 33333333333, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv8i64() + %3 = mul %2, %1 + ret %3 +} + + define @shl_stepvector_nxv8i64() { ; CHECK-LABEL: shl_stepvector_nxv8i64: ; CHECK: # %bb.0: # %entry @@ -449,27 +486,63 @@ declare @llvm.experimental.stepvector.nxv16i64() define @stepvector_nxv16i64() { -; CHECK-LABEL: stepvector_nxv16i64: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vadd.vx v16, v8, a0 -; CHECK-NEXT: ret +; RV32-LABEL: stepvector_nxv16i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vid.v v8 +; RV32-NEXT: vadd.vv v16, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: stepvector_nxv16i64: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vid.v v8 +; RV64-NEXT: vadd.vx v16, v8, a0 +; RV64-NEXT: ret %v = call @llvm.experimental.stepvector.nxv16i64() ret %v } define @add_stepvector_nxv16i64() { -; CHECK-LABEL: add_stepvector_nxv16i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vsll.vi v8, v8, 1 -; CHECK-NEXT: vadd.vx v16, v8, a0 -; CHECK-NEXT: ret +; RV32-LABEL: add_stepvector_nxv16i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a1, a0, 31 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: srli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vid.v v8 +; RV32-NEXT: vsll.vi v8, v8, 1 +; RV32-NEXT: vadd.vv v16, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: add_stepvector_nxv16i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vid.v v8 +; RV64-NEXT: vsll.vi v8, v8, 1 +; RV64-NEXT: vadd.vx v16, v8, a0 +; RV64-NEXT: ret entry: %0 = call @llvm.experimental.stepvector.nxv16i64() %1 = call @llvm.experimental.stepvector.nxv16i64() @@ -478,18 +551,39 @@ } define @mul_stepvector_nxv16i64() { -; CHECK-LABEL: mul_stepvector_nxv16i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: addi a0, zero, 3 -; CHECK-NEXT: vmul.vx v8, v8, a0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a1, zero, 24 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: vadd.vx v16, v8, a0 -; CHECK-NEXT: ret +; RV32-LABEL: mul_stepvector_nxv16i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a0, a0, 3 +; RV32-NEXT: addi a1, zero, 24 +; RV32-NEXT: mulhu a2, a0, a1 +; RV32-NEXT: sw a2, 12(sp) +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vid.v v8 +; RV32-NEXT: addi a0, zero, 3 +; RV32-NEXT: vmul.vx v8, v8, a0 +; RV32-NEXT: vadd.vv v16, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: mul_stepvector_nxv16i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV64-NEXT: vid.v v8 +; RV64-NEXT: addi a0, zero, 3 +; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: srli a0, a0, 3 +; RV64-NEXT: addi a1, zero, 24 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: vadd.vx v16, v8, a0 +; RV64-NEXT: ret entry: %0 = insertelement poison, i64 3, i32 0 %1 = shufflevector %0, poison, zeroinitializer @@ -498,16 +592,93 @@ ret %3 } +define @mul_bigimm_stepvector_nxv16i64() { +; RV32-LABEL: mul_bigimm_stepvector_nxv16i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi a0, zero, 7 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: lui a0, 797989 +; RV32-NEXT: addi a0, a0, -683 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a0, a0, 3 +; RV32-NEXT: lui a1, 92455 +; RV32-NEXT: addi a1, a1, -1368 +; RV32-NEXT: mul a2, a0, a1 +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: addi a2, zero, 62 +; RV32-NEXT: mul a2, a0, a2 +; RV32-NEXT: mulhu a0, a0, a1 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vid.v v24 +; RV32-NEXT: vmul.vv v8, v24, v8 +; RV32-NEXT: vadd.vv v16, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: mul_bigimm_stepvector_nxv16i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: srli a0, a0, 3 +; RV64-NEXT: lui a1, 15895 +; RV64-NEXT: addiw a1, a1, -1753 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -1368 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vid.v v8 +; RV64-NEXT: lui a1, 1987 +; RV64-NEXT: addiw a1, a1, -731 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -683 +; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: vadd.vx v16, v8, a0 +; RV64-NEXT: ret +entry: + %0 = insertelement poison, i64 33333333333, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv16i64() + %3 = mul %2, %1 + ret %3 +} + define @shl_stepvector_nxv16i64() { -; CHECK-LABEL: shl_stepvector_nxv16i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vsll.vi v8, v8, 2 -; CHECK-NEXT: vadd.vx v16, v8, a0 -; CHECK-NEXT: ret +; RV32-LABEL: shl_stepvector_nxv16i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a1, a0, 30 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: srli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vid.v v8 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vadd.vv v16, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: shl_stepvector_nxv16i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vid.v v8 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vadd.vx v16, v8, a0 +; RV64-NEXT: ret entry: %0 = insertelement poison, i64 2, i32 0 %1 = shufflevector %0, poison, zeroinitializer