Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -16596,6 +16596,36 @@ the source/result vector. The ``imm`` is a signed integer constant in the range ``-VL <= imm < VL``. For values outside of this range the result is poison. + +'``llvm.experimental.stepvector``' Intrinsic + +This is an overloaded intrinsic. You can use ``llvm.experimental.stepvector`` +to generate a vector whose lane values comprise the linear sequence +<0, 1, 2, ...>. It is primarily intended for scalable vectors. + +:: + + declare @llvm.experimental.stepvector.nxv4i32() + declare @llvm.experimental.stepvector.nxv8i16() + +The '``llvm.experimental.stepvector``' intrinsics are used to create vectors +of integers whose elements contain a linear sequence of values starting from 0 +with a step of 1. This experimental intrinsic can only be used for vectors +with integer elements that are at least 8 bits in size. If the sequence value +exceeds the allowed limit for the element type then the result for that lane is +undefined. + +These intrinsics work for both fixed and scalable vectors. While this intrinsic +is marked as experimental, the recommended way to express this operation for +fixed-width vectors is still to generate a constant vector instead. + + +Arguments: +"""""""""" + +None. + + Matrix Intrinsics ----------------- Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1249,6 +1249,12 @@ return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0], VarMask, Alignment, CostKind, I); } + case Intrinsic::experimental_stepvector: { + if (isa(RetTy)) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); + // The cost of materialising a constant integer vector. + return TargetTransformInfo::TCC_Basic; + } case Intrinsic::experimental_vector_extract: { // FIXME: Handle case where a scalable vector is extracted from a scalable // vector Index: llvm/include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- llvm/include/llvm/CodeGen/ISDOpcodes.h +++ llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -592,6 +592,14 @@ /// scalars should have the same type. SPLAT_VECTOR_PARTS, + /// STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised + /// of a linear sequence of unsigned values starting from 0 with a step of + /// IMM, where IMM must be a constant positive integer value. The operation + /// does not support returning fixed-width vectors or non-constant operands. + /// If the sequence value exceeds the limit allowed for the element type then + /// the values for those lanes are undefined. + STEP_VECTOR, + /// MULHU/MULHS - Multiply high - Multiply two integers of type iN, /// producing an unsigned/signed value of type i[2*N], then return the top /// part. Index: llvm/include/llvm/CodeGen/SelectionDAG.h =================================================================== --- llvm/include/llvm/CodeGen/SelectionDAG.h +++ llvm/include/llvm/CodeGen/SelectionDAG.h @@ -833,6 +833,10 @@ return getNode(ISD::SPLAT_VECTOR, DL, VT, Op); } + /// Returns a vector of type ResVT whose elements contain the linear sequence + /// <0, Step, Step * 2, Step * 3, ...> + SDValue getStepVector(const SDLoc &DL, EVT ResVT, SDValue Step); + /// Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to /// the shuffle node in input but with swapped operands. /// Index: llvm/include/llvm/IR/IRBuilder.h =================================================================== --- llvm/include/llvm/IR/IRBuilder.h +++ llvm/include/llvm/IR/IRBuilder.h @@ -854,6 +854,9 @@ /// will be the same type as that of \p Scaling. Value *CreateVScale(Constant *Scaling, const Twine &Name = ""); + /// Creates a vector of type \p DstType with the linear sequence <0, 1, ...> + Value *CreateStepVector(Type *DstType, const Twine &Name = ""); + /// Create a call to intrinsic \p ID with 1 operand which is mangled on its /// type. CallInst *CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V, Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -1336,6 +1336,9 @@ def int_ptrmask: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +def int_experimental_stepvector : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [], [IntrNoMem]>; + //===---------------- Vector Predication Intrinsics --------------===// // Speculatable Binary operators Index: llvm/include/llvm/Target/TargetSelectionDAG.td =================================================================== --- llvm/include/llvm/Target/TargetSelectionDAG.td +++ llvm/include/llvm/Target/TargetSelectionDAG.td @@ -661,6 +661,8 @@ def vector_splice : SDNode<"ISD::VECTOR_SPLICE", SDTVecSlice, []>; def build_vector : SDNode<"ISD::BUILD_VECTOR", SDTypeProfile<1, -1, []>, []>; def splat_vector : SDNode<"ISD::SPLAT_VECTOR", SDTypeProfile<1, 1, []>, []>; +def step_vector : SDNode<"ISD::STEP_VECTOR", SDTypeProfile<1, 1, + [SDTCisVec<0>, SDTCisInt<1>]>, []>; def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>, []>; Index: llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -110,6 +110,7 @@ Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break; case ISD::SPLAT_VECTOR: Res = PromoteIntRes_SPLAT_VECTOR(N); break; + case ISD::STEP_VECTOR: Res = PromoteIntRes_STEP_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = PromoteIntRes_CONCAT_VECTORS(N); break; @@ -4782,6 +4783,18 @@ return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op); } +SDValue DAGTypeLegalizer::PromoteIntRes_STEP_VECTOR(SDNode *N) { + SDLoc dl(N); + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "Type must be promoted to a vector type"); + EVT NOutElemVT = TLI.getTypeToTransformTo(*DAG.getContext(), + NOutVT.getVectorElementType()); + APInt StepVal = cast(N->getOperand(0))->getAPIntValue(); + SDValue Step = DAG.getConstant(StepVal.getZExtValue(), dl, NOutElemVT); + return DAG.getStepVector(dl, NOutVT, Step); +} + SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { SDLoc dl(N); Index: llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -304,6 +304,7 @@ SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N); SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N); SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N); + SDValue PromoteIntRes_STEP_VECTOR(SDNode *N); SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N); SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N); @@ -836,6 +837,7 @@ void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi); void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi); void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -928,6 +928,9 @@ case ISD::SCALAR_TO_VECTOR: SplitVecRes_ScalarOp(N, Lo, Hi); break; + case ISD::STEP_VECTOR: + SplitVecRes_STEP_VECTOR(N, Lo, Hi); + break; case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::LOAD: SplitVecRes_LOAD(cast(N), Lo, Hi); @@ -1639,6 +1642,30 @@ Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); } +void DAGTypeLegalizer::SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(N); + assert(N->getValueType(0).isScalableVector() && + "Only scalable vectors are supported for STEP_VECTOR"); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + SDValue Step = N->getOperand(0); + + Lo = DAG.getNode(ISD::STEP_VECTOR, dl, LoVT, Step); + + // Hi = Lo + (EltCnt * Step) + EVT EltVT = Step.getValueType(); + SDValue StartOfHi = + DAG.getVScale(dl, EltVT, + cast(Step)->getAPIntValue() * + LoVT.getVectorMinNumElements()); + StartOfHi = DAG.getZExtOrTrunc(StartOfHi, dl, HiVT.getVectorElementType()); + StartOfHi = DAG.getNode(ISD::SPLAT_VECTOR, dl, HiVT, StartOfHi); + + Hi = DAG.getNode(ISD::STEP_VECTOR, dl, HiVT, Step); + Hi = DAG.getNode(ISD::ADD, dl, HiVT, Hi, StartOfHi); +} + void DAGTypeLegalizer::SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1744,6 +1744,18 @@ return SDValue(CondCodeNodes[Cond], 0); } +SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT, SDValue Step) { + if (ResVT.isScalableVector()) + return getNode(ISD::STEP_VECTOR, DL, ResVT, Step); + + EVT OpVT = Step.getValueType(); + APInt StepVal = cast(Step)->getAPIntValue(); + SmallVector OpsStepConstants; + for (uint64_t i = 0; i < ResVT.getVectorNumElements(); i++) + OpsStepConstants.push_back(getConstant(StepVal * i, DL, OpVT)); + return getBuildVector(ResVT, DL, OpsStepConstants); +} + /// Swaps the values of N1 and N2. Swaps all indices in the shuffle mask M that /// point at N1 to point at N2 and indices that point at N2 to point at N1. static void commuteShuffle(SDValue &N1, SDValue &N2, MutableArrayRef M) { @@ -4306,6 +4318,14 @@ return (computeKnownBits(A).Zero | computeKnownBits(B).Zero).isAllOnesValue(); } +static SDValue FoldSTEP_VECTOR(const SDLoc &DL, EVT VT, SDValue Step, + SelectionDAG &DAG) { + if (cast(Step)->isNullValue()) + return DAG.getConstant(0, DL, VT); + + return SDValue(); +} + static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT, ArrayRef Ops, SelectionDAG &DAG) { @@ -4527,6 +4547,11 @@ APFloat::rmNearestTiesToEven, &Ignored); return getConstantFP(FPV, DL, VT); } + case ISD::STEP_VECTOR: { + if (SDValue V = FoldSTEP_VECTOR(DL, VT, Operand, *this)) + return V; + break; + } } } @@ -4636,6 +4661,18 @@ unsigned OpOpcode = Operand.getNode()->getOpcode(); switch (Opcode) { + case ISD::STEP_VECTOR: + assert(VT.isScalableVector() && + "STEP_VECTOR can only be used with scalable types"); + assert(VT.getScalarSizeInBits() >= 8 && + "STEP_VECTOR can only be used with vectors of integers that are at " + "least 8 bits wide"); + assert(Operand.getValueType().bitsGE(VT.getScalarType()) && + "Operand type should be at least as large as the element type"); + assert(isa(Operand) && + cast(Operand)->getAPIntValue().isNonNegative() && + "Expected positive integer constant for STEP_VECTOR"); + break; case ISD::FREEZE: assert(VT == Operand.getValueType() && "Unexpected VT!"); break; Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -779,6 +779,7 @@ void visitVectorReduce(const CallInst &I, unsigned Intrinsic); void visitVectorReverse(const CallInst &I); void visitVectorSplice(const CallInst &I); + void visitStepVector(const CallInst &I); void visitUserOp1(const Instruction &I) { llvm_unreachable("UserOp1 should not exist at instruction selection time!"); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6945,7 +6945,9 @@ case Intrinsic::experimental_deoptimize: LowerDeoptimizeCall(&I); return; - + case Intrinsic::experimental_stepvector: + visitStepVector(I); + return; case Intrinsic::vector_reduce_fadd: case Intrinsic::vector_reduce_fmul: case Intrinsic::vector_reduce_add: @@ -10929,6 +10931,16 @@ } } +void SelectionDAGBuilder::visitStepVector(const CallInst &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + auto DL = getCurSDLoc(); + EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + EVT OpVT = + TLI.getTypeToTransformTo(*DAG.getContext(), ResultVT.getScalarType()); + SDValue Step = DAG.getConstant(1, DL, OpVT); + setValue(&I, DAG.getStepVector(DL, ResultVT, Step)); +} + void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -292,6 +292,7 @@ case ISD::SPLAT_VECTOR: return "splat_vector"; case ISD::SPLAT_VECTOR_PARTS: return "splat_vector_parts"; case ISD::VECTOR_REVERSE: return "vector_reverse"; + case ISD::STEP_VECTOR: return "step_vector"; case ISD::CARRY_FALSE: return "carry_false"; case ISD::ADDC: return "addc"; case ISD::ADDE: return "adde"; Index: llvm/lib/IR/IRBuilder.cpp =================================================================== --- llvm/lib/IR/IRBuilder.cpp +++ llvm/lib/IR/IRBuilder.cpp @@ -91,6 +91,23 @@ : CreateMul(CI, Scaling); } +Value *IRBuilderBase::CreateStepVector(Type *DstType, const Twine &Name) { + if (isa(DstType)) + return CreateIntrinsic(Intrinsic::experimental_stepvector, {DstType}, {}, + nullptr, Name); + + Type *STy = DstType->getScalarType(); + unsigned NumEls = cast(DstType)->getNumElements(); + + // Create a vector of consecutive numbers from zero to VF. + SmallVector Indices; + for (unsigned i = 0; i < NumEls; ++i) + Indices.push_back(ConstantInt::get(STy, i)); + + // Add the consecutive indices to the vector value. + return ConstantVector::get(Indices); +} + CallInst *IRBuilderBase::CreateMemSet(Value *Ptr, Value *Val, Value *Size, MaybeAlign Align, bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag, Index: llvm/lib/IR/Verifier.cpp =================================================================== --- llvm/lib/IR/Verifier.cpp +++ llvm/lib/IR/Verifier.cpp @@ -5185,6 +5185,15 @@ break; } + case Intrinsic::experimental_stepvector: { + VectorType *VecTy = dyn_cast(Call.getType()); + Assert(VecTy && VecTy->getScalarType()->isIntegerTy() && + VecTy->getScalarSizeInBits() >= 8, + "experimental_stepvector only supported for vectors of integers " + "with a bitwidth of at least 8.", + &Call); + break; + } case Intrinsic::experimental_vector_insert: { VectorType *VecTy = cast(Call.getArgOperand(0)->getType()); VectorType *SubVecTy = cast(Call.getArgOperand(1)->getType()); Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -936,6 +936,7 @@ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp, bool OverrideNEON = false) const; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1127,6 +1127,7 @@ setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); + setOperationAction(ISD::STEP_VECTOR, VT, Custom); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::MULHS, VT, Expand); @@ -4392,6 +4393,8 @@ return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::SPLAT_VECTOR: return LowerSPLAT_VECTOR(Op, DAG); + case ISD::STEP_VECTOR: + return LowerSTEP_VECTOR(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); case ISD::INSERT_SUBVECTOR: @@ -9039,6 +9042,21 @@ return GenerateTBL(Op, ShuffleMask, DAG); } +SDValue AArch64TargetLowering::LowerSTEP_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + EVT VT = Op.getValueType(); + assert(VT.isScalableVector() && + "Only expect scalable vectors for STEP_VECTOR"); + EVT ElemVT = VT.getScalarType(); + assert(ElemVT != MVT::i1 && + "Vectors of i1 types not supported for STEP_VECTOR"); + + SDValue StepVal = Op.getOperand(0); + SDValue Zero = DAG.getConstant(0, dl, StepVal.getValueType()); + return DAG.getNode(AArch64ISD::INDEX_VECTOR, dl, VT, Zero, StepVal); +} + SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -260,6 +260,19 @@ return LT.first; break; } + case Intrinsic::experimental_stepvector: { + unsigned Cost = 1; // Cost of the `index' instruction + auto LT = TLI->getTypeLegalizationCost(DL, RetTy); + // Legalisation of illegal vectors involves an `index' instruction plus + // (LT.first - 1) vector adds. + if (LT.first > 1) { + Type *LegalVTy = EVT(LT.second).getTypeForEVT(RetTy->getContext()); + unsigned AddCost = + getArithmeticInstrCost(Instruction::Add, LegalVTy, CostKind); + Cost += AddCost * (LT.first - 1); + } + return Cost; + } default: break; } Index: llvm/test/Analysis/CostModel/AArch64/neon-stepvector.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/CostModel/AArch64/neon-stepvector.ll @@ -0,0 +1,34 @@ +; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+neon < %s | FileCheck %s + +; Check icmp for legal integer vectors. +define void @stepvector_legal_int() { +; CHECK-LABEL: 'stepvector_legal_int' +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x i64> @llvm.experimental.stepvector.v2i64() +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x i32> @llvm.experimental.stepvector.v4i32() +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x i16> @llvm.experimental.stepvector.v8i16() +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x i8> @llvm.experimental.stepvector.v16i8() + %1 = call <2 x i64> @llvm.experimental.stepvector.v2i64() + %2 = call <4 x i32> @llvm.experimental.stepvector.v4i32() + %3 = call <8 x i16> @llvm.experimental.stepvector.v8i16() + %4 = call <16 x i8> @llvm.experimental.stepvector.v16i8() + ret void +} + +; Check icmp for an illegal integer vector. +define void @stepvector_illegal_int() { +; CHECK-LABEL: 'stepvector_illegal_int' +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %1 = call <4 x i64> @llvm.experimental.stepvector.v4i64() +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %2 = call <16 x i32> @llvm.experimental.stepvector.v16i32() + %1 = call <4 x i64> @llvm.experimental.stepvector.v4i64() + %2 = call <16 x i32> @llvm.experimental.stepvector.v16i32() + ret void +} + + +declare <2 x i64> @llvm.experimental.stepvector.v2i64() +declare <4 x i32> @llvm.experimental.stepvector.v4i32() +declare <8 x i16> @llvm.experimental.stepvector.v8i16() +declare <16 x i8> @llvm.experimental.stepvector.v16i8() + +declare <4 x i64> @llvm.experimental.stepvector.v4i64() +declare <16 x i32> @llvm.experimental.stepvector.v16i32() Index: llvm/test/Analysis/CostModel/AArch64/sve-stepvector.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/CostModel/AArch64/sve-stepvector.ll @@ -0,0 +1,39 @@ +; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s + +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +; Check icmp for legal integer vectors. +define void @stepvector_legal_int() { +; CHECK-LABEL: 'stepvector_legal_int' +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = call @llvm.experimental.stepvector.nxv2i64() +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = call @llvm.experimental.stepvector.nxv4i32() +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = call @llvm.experimental.stepvector.nxv8i16() +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = call @llvm.experimental.stepvector.nxv16i8() + %1 = call @llvm.experimental.stepvector.nxv2i64() + %2 = call @llvm.experimental.stepvector.nxv4i32() + %3 = call @llvm.experimental.stepvector.nxv8i16() + %4 = call @llvm.experimental.stepvector.nxv16i8() + ret void +} + +; Check icmp for an illegal integer vector. +define void @stepvector_illegal_int() { +; CHECK-LABEL: 'stepvector_illegal_int' +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %1 = call @llvm.experimental.stepvector.nxv4i64() +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %2 = call @llvm.experimental.stepvector.nxv16i32() + %1 = call @llvm.experimental.stepvector.nxv4i64() + %2 = call @llvm.experimental.stepvector.nxv16i32() + ret void +} + + +declare @llvm.experimental.stepvector.nxv2i64() +declare @llvm.experimental.stepvector.nxv4i32() +declare @llvm.experimental.stepvector.nxv8i16() +declare @llvm.experimental.stepvector.nxv16i8() + +declare @llvm.experimental.stepvector.nxv4i64() +declare @llvm.experimental.stepvector.nxv16i32() Index: llvm/test/CodeGen/AArch64/neon-stepvector.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/neon-stepvector.ll @@ -0,0 +1,181 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefixes=CHECK + +; LEGAL INTEGER TYPES + +define <2 x i64> @stepvector_v2i64() { +; CHECK-LABEL: .LCPI0_0: +; CHECK-NEXT: .xword 0 +; CHECK-NEXT: .xword 1 +; CHECK-LABEL: stepvector_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI0_0 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: ret +entry: + %0 = call <2 x i64> @llvm.experimental.stepvector.v2i64() + ret <2 x i64> %0 +} + +define <4 x i32> @stepvector_v4i32() { +; CHECK-LABEL: .LCPI1_0: +; CHECK-NEXT: .word 0 +; CHECK-NEXT: .word 1 +; CHECK-NEXT: .word 2 +; CHECK-NEXT: .word 3 +; CHECK-LABEL: stepvector_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: ret +entry: + %0 = call <4 x i32> @llvm.experimental.stepvector.v4i32() + ret <4 x i32> %0 +} + +define <8 x i16> @stepvector_v8i16() { +; CHECK-LABEL: .LCPI2_0: +; CHECK-NEXT: .hword 0 +; CHECK-NEXT: .hword 1 +; CHECK-NEXT: .hword 2 +; CHECK-NEXT: .hword 3 +; CHECK-NEXT: .hword 4 +; CHECK-NEXT: .hword 5 +; CHECK-NEXT: .hword 6 +; CHECK-NEXT: .hword 7 +; CHECK-LABEL: stepvector_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI2_0 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI2_0] +; CHECK-NEXT: ret +entry: + %0 = call <8 x i16> @llvm.experimental.stepvector.v8i16() + ret <8 x i16> %0 +} + +define <16 x i8> @stepvector_v16i8() { +; CHECK-LABEL: .LCPI3_0: +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 3 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 5 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .byte 12 +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .byte 15 +; CHECK-LABEL: stepvector_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI3_0 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: ret +entry: + %0 = call <16 x i8> @llvm.experimental.stepvector.v16i8() + ret <16 x i8> %0 +} + +; ILLEGAL INTEGER TYPES + +define <4 x i64> @stepvector_v4i64() { +; CHECK-LABEL: .LCPI4_0: +; CHECK-NEXT: .xword 0 +; CHECK-NEXT: .xword 1 +; CHECK-LABEL: .LCPI4_1: +; CHECK-NEXT: .xword 2 +; CHECK-NEXT: .xword 3 +; CHECK-LABEL: stepvector_v4i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: adrp x9, .LCPI4_1 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0] +; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI4_1] +; CHECK-NEXT: ret +entry: + %0 = call <4 x i64> @llvm.experimental.stepvector.v4i64() + ret <4 x i64> %0 +} + +define <16 x i32> @stepvector_v16i32() { +; CHECK-LABEL: .LCPI5_0: +; CHECK-NEXT: .word 0 +; CHECK-NEXT: .word 1 +; CHECK-NEXT: .word 2 +; CHECK-NEXT: .word 3 +; CHECK-LABEL: .LCPI5_1: +; CHECK-NEXT: .word 4 +; CHECK-NEXT: .word 5 +; CHECK-NEXT: .word 6 +; CHECK-NEXT: .word 7 +; CHECK-LABEL: .LCPI5_2: +; CHECK-NEXT: .word 8 +; CHECK-NEXT: .word 9 +; CHECK-NEXT: .word 10 +; CHECK-NEXT: .word 11 +; CHECK-LABEL: .LCPI5_3: +; CHECK-NEXT: .word 12 +; CHECK-NEXT: .word 13 +; CHECK-NEXT: .word 14 +; CHECK-NEXT: .word 15 +; CHECK-LABEL: stepvector_v16i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI5_0 +; CHECK-NEXT: adrp x9, .LCPI5_1 +; CHECK-NEXT: adrp x10, .LCPI5_2 +; CHECK-NEXT: adrp x11, .LCPI5_3 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI5_0] +; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI5_1] +; CHECK-NEXT: ldr q2, [x10, :lo12:.LCPI5_2] +; CHECK-NEXT: ldr q3, [x11, :lo12:.LCPI5_3] +; CHECK-NEXT: ret +entry: + %0 = call <16 x i32> @llvm.experimental.stepvector.v16i32() + ret <16 x i32> %0 +} + +define <2 x i32> @stepvector_v2i32() { +; CHECK-LABEL: .LCPI6_0: +; CHECK-NEXT: .word 0 +; CHECK-NEXT: .word 1 +; CHECK-LABEL: stepvector_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI6_0 +; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI6_0] +; CHECK-NEXT: ret +entry: + %0 = call <2 x i32> @llvm.experimental.stepvector.v2i32() + ret <2 x i32> %0 +} + +define <4 x i16> @stepvector_v4i16() { +; CHECK-LABEL: .LCPI7_0: +; CHECK-NEXT: .hword 0 +; CHECK-NEXT: .hword 1 +; CHECK-NEXT: .hword 2 +; CHECK-NEXT: .hword 3 +; CHECK-LABEL: stepvector_v4i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI7_0 +; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI7_0] +; CHECK-NEXT: ret +entry: + %0 = call <4 x i16> @llvm.experimental.stepvector.v4i16() + ret <4 x i16> %0 +} + + +declare <2 x i64> @llvm.experimental.stepvector.v2i64() +declare <4 x i32> @llvm.experimental.stepvector.v4i32() +declare <8 x i16> @llvm.experimental.stepvector.v8i16() +declare <16 x i8> @llvm.experimental.stepvector.v16i8() + +declare <4 x i64> @llvm.experimental.stepvector.v4i64() +declare <16 x i32> @llvm.experimental.stepvector.v16i32() +declare <2 x i32> @llvm.experimental.stepvector.v2i32() +declare <4 x i16> @llvm.experimental.stepvector.v4i16() Index: llvm/test/CodeGen/AArch64/sve-stepvector.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-stepvector.ll @@ -0,0 +1,121 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s --check-prefixes=CHECK +; RUN: FileCheck --check-prefix=WARN --allow-empty %s < %t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +; LEGAL INTEGER TYPES + +define @stepvector_nxv2i64() { +; CHECK-LABEL: stepvector_nxv2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: index z0.d, #0, #1 +; CHECK-NEXT: ret +entry: + %0 = call @llvm.experimental.stepvector.nxv2i64() + ret %0 +} + +define @stepvector_nxv4i32() { +; CHECK-LABEL: stepvector_nxv4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: index z0.s, #0, #1 +; CHECK-NEXT: ret +entry: + %0 = call @llvm.experimental.stepvector.nxv4i32() + ret %0 +} + +define @stepvector_nxv8i16() { +; CHECK-LABEL: stepvector_nxv8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: index z0.h, #0, #1 +; CHECK-NEXT: ret +entry: + %0 = call @llvm.experimental.stepvector.nxv8i16() + ret %0 +} + +define @stepvector_nxv16i8() { +; CHECK-LABEL: stepvector_nxv16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: index z0.b, #0, #1 +; CHECK-NEXT: ret +entry: + %0 = call @llvm.experimental.stepvector.nxv16i8() + ret %0 +} + +; ILLEGAL INTEGER TYPES + +define @stepvector_nxv4i64() { +; CHECK-LABEL: stepvector_nxv4i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cntd x8 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: index z0.d, #0, #1 +; CHECK-NEXT: add z1.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %0 = call @llvm.experimental.stepvector.nxv4i64() + ret %0 +} + +define @stepvector_nxv16i32() { +; CHECK-LABEL: stepvector_nxv16i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cntw x9 +; CHECK-NEXT: cnth x8 +; CHECK-NEXT: index z0.s, #0, #1 +; CHECK-NEXT: mov z1.s, w9 +; CHECK-NEXT: mov z3.s, w8 +; CHECK-NEXT: add z1.s, z0.s, z1.s +; CHECK-NEXT: add z2.s, z0.s, z3.s +; CHECK-NEXT: add z3.s, z1.s, z3.s +; CHECK-NEXT: ret +entry: + %0 = call @llvm.experimental.stepvector.nxv16i32() + ret %0 +} + +define @stepvector_nxv2i32() { +; CHECK-LABEL: stepvector_nxv2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: index z0.d, #0, #1 +; CHECK-NEXT: ret +entry: + %0 = call @llvm.experimental.stepvector.nxv2i32() + ret %0 +} + +define @stepvector_nxv4i16() { +; CHECK-LABEL: stepvector_nxv4i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: index z0.s, #0, #1 +; CHECK-NEXT: ret +entry: + %0 = call @llvm.experimental.stepvector.nxv4i16() + ret %0 +} + +define @stepvector_nxv8i8() { +; CHECK-LABEL: stepvector_nxv8i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: index z0.h, #0, #1 +; CHECK-NEXT: ret +entry: + %0 = call @llvm.experimental.stepvector.nxv8i8() + ret %0 +} + +declare @llvm.experimental.stepvector.nxv2i64() +declare @llvm.experimental.stepvector.nxv4i32() +declare @llvm.experimental.stepvector.nxv8i16() +declare @llvm.experimental.stepvector.nxv16i8() + +declare @llvm.experimental.stepvector.nxv4i64() +declare @llvm.experimental.stepvector.nxv16i32() +declare @llvm.experimental.stepvector.nxv2i32() +declare @llvm.experimental.stepvector.nxv8i8() +declare @llvm.experimental.stepvector.nxv4i16() Index: llvm/test/Verifier/stepvector-intrinsic.ll =================================================================== --- /dev/null +++ llvm/test/Verifier/stepvector-intrinsic.ll @@ -0,0 +1,29 @@ +; RUN: not opt -S -verify < %s 2>&1 | FileCheck %s + +; Reject stepvector intrinsics that return a scalar + +define i32 @stepvector_i32() { +; CHECK: Intrinsic has incorrect return type! + %1 = call i32 @llvm.experimental.stepvector.i32() + ret i32 %1 +} + +; Reject vectors with non-integer elements + +define @stepvector_float() { +; CHECK: experimental_stepvector only supported for vectors of integers with a bitwidth of at least 8 + %1 = call @llvm.experimental.stepvector.nxv4f32() + ret %1 +} + +; Reject vectors of integers less than 8 bits in width + +define @stepvector_i1() { +; CHECK: experimental_stepvector only supported for vectors of integers with a bitwidth of at least 8 + %1 = call @llvm.experimental.stepvector.nxv16i1() + ret %1 +} + +declare i32 @llvm.experimental.stepvector.i32() +declare @llvm.experimental.stepvector.nxv4f32() +declare @llvm.experimental.stepvector.nxv16i1() Index: llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp =================================================================== --- llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp +++ llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp @@ -648,4 +648,18 @@ EXPECT_DEATH(getTypeAction(FromVT), "Cannot legalize this vector"); } +TEST_F(AArch64SelectionDAGTest, TestFold_STEP_VECTOR) { + if (!TM) + return; + + SDLoc Loc; + auto IntVT = EVT::getIntegerVT(Context, 8); + auto VecVT = EVT::getVectorVT(Context, MVT::i8, 16, true); + + // Should create SPLAT_VECTOR + SDValue Zero = DAG->getConstant(0, Loc, IntVT); + SDValue Op = DAG->getNode(ISD::STEP_VECTOR, Loc, VecVT, Zero); + EXPECT_EQ(Op.getOpcode(), ISD::SPLAT_VECTOR); +} + } // end namespace llvm Index: llvm/unittests/IR/IRBuilderTest.cpp =================================================================== --- llvm/unittests/IR/IRBuilderTest.cpp +++ llvm/unittests/IR/IRBuilderTest.cpp @@ -180,6 +180,32 @@ EXPECT_EQ(FTy->getParamType(i), ArgTys[i]->getType()); } +TEST_F(IRBuilderTest, CreateStepVector) { + IRBuilder<> Builder(BB); + + // Fixed width vectors + Type *DstVecTy = VectorType::get(Builder.getInt32Ty(), 4, false); + Value *StepVec = Builder.CreateStepVector(DstVecTy); + EXPECT_TRUE(isa(StepVec)); + EXPECT_EQ(StepVec->getType(), DstVecTy); + + const auto *VectorValue = cast(StepVec); + for (unsigned i = 0; i < 4; i++) { + EXPECT_TRUE(isa(VectorValue->getAggregateElement(i))); + ConstantInt *El = cast(VectorValue->getAggregateElement(i)); + EXPECT_EQ(El->getValue(), i); + } + + // Scalable vectors + DstVecTy = VectorType::get(Builder.getInt32Ty(), 4, true); + StepVec = Builder.CreateStepVector(DstVecTy); + EXPECT_TRUE(isa(StepVec)); + CallInst *Call = cast(StepVec); + FunctionType *FTy = Call->getFunctionType(); + EXPECT_EQ(FTy->getReturnType(), DstVecTy); + EXPECT_EQ(Call->getIntrinsicID(), Intrinsic::experimental_stepvector); +} + TEST_F(IRBuilderTest, ConstrainedFP) { IRBuilder<> Builder(BB); Value *V;