Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -17722,6 +17722,37 @@ mask argument does not match the pointer size of the target, the mask is zero-extended or truncated accordingly. +.. _int_vscale: + +'``llvm.vscale``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i32 llvm.vscale(i32 %scaling) readnone + +Arguments: +"""""""""" + +The argument is a i32 immediate. + +Overview: +""""""""" + +The ``llvm.vscale`` intrinsic returns the value for ``vscale`` in scalable +vectors such as ````. + +Semantics: +"""""""""" + +``vscale`` is a positive value that is constant throughout the program +but is unknown at compile time. The ``scaling`` immediate can be used +for convenience and is multiplied at runtime with ``vscale``. + + Stack Map Intrinsics -------------------- @@ -17735,6 +17766,7 @@ These intrinsics are similar to the standard library memory intrinsics except that they perform memory transfer as a sequence of atomic memory accesses. + .. _int_memcpy_element_unordered_atomic: '``llvm.memcpy.element.unordered.atomic``' Intrinsic Index: llvm/include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- llvm/include/llvm/CodeGen/ISDOpcodes.h +++ llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -915,6 +915,12 @@ /// known nonzero constant. The only operand here is the chain. GET_DYNAMIC_AREA_OFFSET, + /// VSCALE(IMM) - Returns the runtime scaling factor used to calculate the + /// number of elements within a scalable vector. IMM is a constant integer + /// multiplier that is applied to the runtime value and is usually some + /// multiple of MVT.getVectorNumElements(). + VSCALE, + /// Generic reduction nodes. These nodes represent horizontal vector /// reduction operations, producing a scalar result. /// The STRICT variants perform reductions in sequential order. The first Index: llvm/include/llvm/CodeGen/SelectionDAG.h =================================================================== --- llvm/include/llvm/CodeGen/SelectionDAG.h +++ llvm/include/llvm/CodeGen/SelectionDAG.h @@ -912,6 +912,11 @@ return getNode(ISD::UNDEF, SDLoc(), VT); } + /// Return a node that represents the runtime scaling 'MulImm * RuntimeVL'. + SDValue getVScale(const SDLoc &DL, EVT VT, int64_t MulImm=1) { + return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT)); + } + /// Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc. SDValue getGLOBAL_OFFSET_TABLE(EVT VT) { return getNode(ISD::GLOBAL_OFFSET_TABLE, SDLoc(), VT); Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -1331,6 +1331,11 @@ [IntrNoMem, ImmArg<1>, ImmArg<2>]>; +//===---------- Intrinsics to query properties of scalable vectors --------===// +def int_vscale : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>; + +//===----------------------------------------------------------------------===// + //===----------------------------------------------------------------------===// // Target-specific intrinsics //===----------------------------------------------------------------------===// Index: llvm/include/llvm/IR/PatternMatch.h =================================================================== --- llvm/include/llvm/IR/PatternMatch.h +++ llvm/include/llvm/IR/PatternMatch.h @@ -32,6 +32,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -2002,6 +2003,49 @@ return ExtractValue_match(V); } +/// Matches patterns for `vscale`. This can either be a call to `llvm.vscale` or +/// the constant expression +/// `ptrtoint(gep , * null, i32 %scaling>` +/// under the right conditions determined by DataLayout. +struct VScaleVal_match { +private: + template + inline BinaryOp_match + m_OffsetGep(const Base &B, const Offset &O) { + return BinaryOp_match(B, O); + } + +public: + const DataLayout &DL; + int &Val; + VScaleVal_match(const DataLayout &DL, int &S) : DL(DL), Val(S) {} + + template bool match(ITy *V) { + uint64_t S; + + if (m_Intrinsic(m_ConstantInt(S)).match(V)) { + Val = (int)S; + return true; + } + + if (m_PtrToInt(m_OffsetGep(m_Zero(), m_ConstantInt(S))).match(V)) { + Type *PtrTy = cast(V)->getOperand(0)->getType(); + Type *DerefTy = PtrTy->getPointerElementType(); + if (DerefTy->isVectorTy() && DerefTy->getVectorIsScalable() && + DL.getTypeAllocSizeInBits(DerefTy).getKnownMinSize() == 8) { + Val = (int)S; + return true; + } + } + + return false; + } +}; + +inline VScaleVal_match m_VScale(const DataLayout &DL, int &S) { + return VScaleVal_match(DL, S); +} + } // end namespace PatternMatch } // end namespace llvm Index: llvm/include/llvm/Target/TargetSelectionDAG.td =================================================================== --- llvm/include/llvm/Target/TargetSelectionDAG.td +++ llvm/include/llvm/Target/TargetSelectionDAG.td @@ -316,6 +316,7 @@ def bb : SDNode<"ISD::BasicBlock", SDTOther , [], "BasicBlockSDNode">; def cond : SDNode<"ISD::CONDCODE" , SDTOther , [], "CondCodeSDNode">; def undef : SDNode<"ISD::UNDEF" , SDTUNDEF , []>; +def vscale : SDNode<"ISD::VSCALE" , SDTIntUnaryOp, []>; def globaladdr : SDNode<"ISD::GlobalAddress", SDTPtrLeaf, [], "GlobalAddressSDNode">; def tglobaladdr : SDNode<"ISD::TargetGlobalAddress", SDTPtrLeaf, [], Index: llvm/lib/Analysis/ConstantFolding.cpp =================================================================== --- llvm/lib/Analysis/ConstantFolding.cpp +++ llvm/lib/Analysis/ConstantFolding.cpp @@ -828,7 +828,8 @@ Type *SrcElemTy = GEP->getSourceElementType(); Type *ResElemTy = GEP->getResultElementType(); Type *ResTy = GEP->getType(); - if (!SrcElemTy->isSized()) + if (!SrcElemTy->isSized() || + (SrcElemTy->isVectorTy() && SrcElemTy->getVectorIsScalable())) return nullptr; if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy, Index: llvm/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/lib/Analysis/ValueTracking.cpp +++ llvm/lib/Analysis/ValueTracking.cpp @@ -1395,7 +1395,8 @@ } else { // Handle array index arithmetic. Type *IndexedTy = GTI.getIndexedType(); - if (!IndexedTy->isSized()) { + if (!IndexedTy->isSized() || + (IndexedTy->isVectorTy() && IndexedTy->getVectorIsScalable())) { TrailZ = 0; break; } Index: llvm/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2010,6 +2010,21 @@ return despeculateCountZeros(II, TLI, DL, ModifiedDT); case Intrinsic::dbg_value: return fixupDbgValue(II); + case Intrinsic::vscale: { + // If datalayout has no special restrictions on vector data layout, + // replace `llvm.vscale` by an equivalent constant expression + // to benefit from cheap constant propagation. + Type *ScalableVectorTy = + VectorType::get(Type::getInt8Ty(II->getContext()), 1, true); + if (DL->getTypeAllocSize(ScalableVectorTy).getKnownMinSize() == 8) { + auto Null = Constant::getNullValue(ScalableVectorTy->getPointerTo()); + auto *CGep = ConstantExpr::getGetElementPtr(ScalableVectorTy, Null, + II->getOperand(0)); + II->replaceAllUsesWith(ConstantExpr::getPtrToInt(CGep, II->getType())); + II->eraseFromParent(); + return true; + } + } } if (TLI) { Index: llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -91,6 +91,7 @@ case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break; case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break; case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break; + case ISD::VSCALE: Res = PromoteIntRes_VSCALE(N); break; case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break; @@ -1113,6 +1114,13 @@ N->getValueType(0))); } +SDValue DAGTypeLegalizer::PromoteIntRes_VSCALE(SDNode *N) { + EVT VT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + + int64_t MulImm = cast(N->getOperand(0))->getSExtValue(); + return DAG.getVScale(SDLoc(N), VT, MulImm); +} + SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { SDValue Chain = N->getOperand(0); // Get the chain. SDValue Ptr = N->getOperand(1); // Get the pointer. Index: llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -326,6 +326,7 @@ SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_UNDEF(SDNode *N); SDValue PromoteIntRes_VAARG(SDNode *N); + SDValue PromoteIntRes_VSCALE(SDNode *N); SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_ADDSUBSAT(SDNode *N); SDValue PromoteIntRes_MULFIX(SDNode *N); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5130,11 +5130,19 @@ if (N2C && N2C->isNullValue()) return N1; break; + case ISD::MUL: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + if (N2C && (N1.getOpcode() == ISD::VSCALE)) { + int64_t MulImm = cast(N1->getOperand(0))->getSExtValue(); + return getVScale(DL, VT, MulImm * N2C->getSExtValue()); + } + break; case ISD::UDIV: case ISD::UREM: case ISD::MULHU: case ISD::MULHS: - case ISD::MUL: case ISD::SDIV: case ISD::SREM: case ISD::SMIN: @@ -5167,6 +5175,11 @@ "Invalid FCOPYSIGN!"); break; case ISD::SHL: + if (N2C && (N1.getOpcode() == ISD::VSCALE)) { + int64_t MulImm = cast(N1->getOperand(0))->getSExtValue(); + return getVScale(DL, VT, MulImm << N2C->getSExtValue()); + } + LLVM_FALLTHROUGH; case ISD::SRA: case ISD::SRL: if (SDValue V = simplifyShift(N1, N2)) Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1468,6 +1468,10 @@ TLI.getPointerTy(DAG.getDataLayout(), AS)); } + int Scaling; + if (match(C, m_VScale(DAG.getDataLayout(), Scaling))) + return DAG.getVScale(getCurSDLoc(), VT, Scaling); + if (const ConstantFP *CFP = dyn_cast(C)) return DAG.getConstantFP(*CFP, getCurSDLoc(), VT); @@ -5732,6 +5736,12 @@ // By default, turn this into a target intrinsic node. visitTargetIntrinsic(I, Intrinsic); return; + case Intrinsic::vscale: { + int Scaling; + match(&I, m_VScale(DAG.getDataLayout(), Scaling)); + setValue(&I, DAG.getVScale(getCurSDLoc(), MVT::i32, Scaling)); + return; + } case Intrinsic::vastart: visitVAStart(I); return; case Intrinsic::vaend: visitVAEnd(I); return; case Intrinsic::vacopy: visitVACopy(I); return; Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -170,6 +170,7 @@ case ISD::CopyToReg: return "CopyToReg"; case ISD::CopyFromReg: return "CopyFromReg"; case ISD::UNDEF: return "undef"; + case ISD::VSCALE: return "vscale"; case ISD::MERGE_VALUES: return "merge_values"; case ISD::INLINEASM: return "inlineasm"; case ISD::INLINEASM_BR: return "inlineasm_br"; Index: llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -62,6 +62,9 @@ unsigned ConstraintID, std::vector &OutOps) override; + template + bool SelectRDVLImm(SDValue N, SDValue &Imm); + bool tryMLAV64LaneV128(SDNode *N); bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N); bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); @@ -675,6 +678,23 @@ return SDValue(Node, 0); } +// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. +template +bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) { + if (!isa(N)) + return false; + + int64_t MulImm = cast(N)->getSExtValue(); + if ((MulImm % std::abs(Scale)) == 0) { + int64_t RDVLImm = MulImm / Scale; + if ((RDVLImm >= Low) && (RDVLImm <= High)) { + Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32); + return true; + } + } + + return false; +} /// SelectArithExtendedRegister - Select a "extended register" operand. This /// operand folds in an extend followed by an optional left shift. Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -743,6 +743,7 @@ SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -821,6 +821,9 @@ } } + if (Subtarget->hasSVE()) + setOperationAction(ISD::VSCALE, MVT::i32, Custom); + setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom); } @@ -3213,6 +3216,8 @@ return LowerATOMIC_LOAD_AND(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::VSCALE: + return LowerVSCALE(Op, DAG); } } @@ -8522,6 +8527,16 @@ return DAG.getMergeValues(Ops, dl); } +SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + assert(VT != MVT::i64 && "Expected illegal VSCALE node"); + + SDLoc DL(Op); + int64_t MulImm = cast(Op.getOperand(0))->getSExtValue(); + return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm), DL, VT); +} + /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment /// specified in the intrinsic calls. Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -46,6 +46,17 @@ def AArch64ld1_gather_sxtw_scaled : SDNode<"AArch64ISD::GLD1_SXTW_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; def AArch64ld1_gather_imm : SDNode<"AArch64ISD::GLD1_IMM", SDT_AArch64_GLD1_IMM, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; +// SVE CNT/INC/RDVL +def sve_rdvl_imm : ComplexPattern">; +def sve_cnth_imm : ComplexPattern">; +def sve_cntw_imm : ComplexPattern">; +def sve_cntd_imm : ComplexPattern">; + +// SVE DEC +def sve_cnth_imm_neg : ComplexPattern">; +def sve_cntw_imm_neg : ComplexPattern">; +def sve_cntd_imm_neg : ComplexPattern">; + def AArch64ld1s_gather : SDNode<"AArch64ISD::GLD1S", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; def AArch64ld1s_gather_scaled : SDNode<"AArch64ISD::GLD1S_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; def AArch64ld1s_gather_uxtw : SDNode<"AArch64ISD::GLD1S_UXTW", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; @@ -1093,6 +1104,20 @@ def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i8), (SXTB_ZPmZ_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>; def : Pat<(sext_inreg (nxv8i16 ZPR:$Zs), nxv8i8), (SXTB_ZPmZ_H (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$Zs)>; + // General case that we ideally never want to match. + def : Pat<(vscale GPR64:$scale), (MADDXrrr (UBFMXri (RDVLI_XI 1), 4, 63), $scale, XZR)>; + + let AddedComplexity = 5 in { + def : Pat<(vscale (sve_rdvl_imm i32:$imm)), (RDVLI_XI $imm)>; + def : Pat<(vscale (sve_cnth_imm i32:$imm)), (CNTH_XPiI 31, $imm)>; + def : Pat<(vscale (sve_cntw_imm i32:$imm)), (CNTW_XPiI 31, $imm)>; + def : Pat<(vscale (sve_cntd_imm i32:$imm)), (CNTD_XPiI 31, $imm)>; + + def : Pat<(vscale (sve_cnth_imm_neg i32:$imm)), (SUBXrs XZR, (CNTH_XPiI 31, $imm), 0)>; + def : Pat<(vscale (sve_cntw_imm_neg i32:$imm)), (SUBXrs XZR, (CNTW_XPiI 31, $imm), 0)>; + def : Pat<(vscale (sve_cntd_imm_neg i32:$imm)), (SUBXrs XZR, (CNTD_XPiI 31, $imm), 0)>; + } + def : Pat<(nxv16i8 (bitconvert (nxv8i16 ZPR:$src))), (nxv16i8 ZPR:$src)>; def : Pat<(nxv16i8 (bitconvert (nxv4i32 ZPR:$src))), (nxv16i8 ZPR:$src)>; def : Pat<(nxv16i8 (bitconvert (nxv2i64 ZPR:$src))), (nxv16i8 ZPR:$src)>; Index: llvm/test/CodeGen/AArch64/sve-vscale.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-vscale.ll @@ -0,0 +1,159 @@ +; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s +; RUN: opt -codegenprepare -S < %s | llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 | FileCheck %s + +; +; RDVL +; + +; CHECK-LABEL: rdvl: +; CHECK: rdvl x0, #1 +; CHECK-NEXT: ret +define i32 @rdvl() nounwind { + %vscale = call i32 @llvm.vscale(i32 1) + %1 = mul i32 %vscale, 16 + ret i32 %1 +} + +; CHECK-LABEL: rdvl_arg: +; CHECK: rdvl x0, #1 +; CHECK-NEXT: ret +define i32 @rdvl_arg() nounwind { + %vscale = call i32 @llvm.vscale(i32 16) + ret i32 %vscale +} + +; CHECK-LABEL: rdvl_const: +; CHECK: rdvl x0, #1 +; CHECK-NEXT: ret +define i32 @rdvl_const() nounwind { + ret i32 ptrtoint (* getelementptr (, * null, i64 16) to i32) +} + +; CHECK-LABEL: rdvl_3: +; CHECK: rdvl [[VL_B:x[0-9]+]], #1 +; CHECK-NEXT: lsr [[VL_Q:x[0-9]+]], [[VL_B]], #4 +; CHECK-NEXT: mov w[[MUL:[0-9]+]], #3 +; CHECK-NEXT: mul x0, [[VL_Q]], x[[MUL]] +; CHECK-NEXT: ret +define i32 @rdvl_3() nounwind { + %vscale = call i32 @llvm.vscale(i32 1) + %1 = mul i32 %vscale, 3 + ret i32 %1 +} + + +; CHECK-LABEL: rdvl_min: +; CHECK: rdvl x0, #-32 +; CHECK-NEXT: ret +define i32 @rdvl_min() nounwind { + %vscale = call i32 @llvm.vscale(i32 1) + %1 = mul i32 %vscale, -512 + ret i32 %1 +} + +; CHECK-LABEL: rdvl_max: +; CHECK: rdvl x0, #31 +; CHECK-NEXT: ret +define i32 @rdvl_max() nounwind { + %vscale = call i32 @llvm.vscale(i32 1) + %1 = mul i32 %vscale, 496 + ret i32 %1 +} + +; +; CNTH +; + +; CHECK-LABEL: cnth: +; CHECK: cnth x0{{$}} +; CHECK-NEXT: ret +define i32 @cnth() nounwind { + %vscale = call i32 @llvm.vscale(i32 1) + %1 = mul i32 %vscale, 8 + ret i32 %1 +} + +; CHECK-LABEL: cnth_max: +; CHECK: cnth x0, all, mul #15 +; CHECK-NEXT: ret +define i32 @cnth_max() nounwind { + %vscale = call i32 @llvm.vscale(i32 1) + %1 = mul i32 %vscale, 120 + ret i32 %1 +} + +; CHECK-LABEL: cnth_neg: +; CHECK: cnth [[CNT:x[0-9]+]] +; CHECK: neg x0, [[CNT]] +; CHECK-NEXT: ret +define i32 @cnth_neg() nounwind { + %vscale = call i32 @llvm.vscale(i32 1) + %1 = mul i32 %vscale, -8 + ret i32 %1 +} + +; +; CNTW +; + +; CHECK-LABEL: cntw: +; CHECK: cntw x0{{$}} +; CHECK-NEXT: ret +define i32 @cntw() nounwind { + %vscale = call i32 @llvm.vscale(i32 1) + %1 = mul i32 %vscale, 4 + ret i32 %1 +} + +; CHECK-LABEL: cntw_max: +; CHECK: cntw x0, all, mul #15 +; CHECK-NEXT: ret +define i32 @cntw_max() nounwind { + %vscale = call i32 @llvm.vscale(i32 1) + %1 = mul i32 %vscale, 60 + ret i32 %1 +} + +; CHECK-LABEL: cntw_neg: +; CHECK: cntw [[CNT:x[0-9]+]] +; CHECK: neg x0, [[CNT]] +; CHECK-NEXT: ret +define i32 @cntw_neg() nounwind { + %vscale = call i32 @llvm.vscale(i32 1) + %1 = mul i32 %vscale, -4 + ret i32 %1 +} + +; +; CNTD +; + +; CHECK-LABEL: cntd: +; CHECK: cntd x0{{$}} +; CHECK-NEXT: ret +define i32 @cntd() nounwind { + %vscale = call i32 @llvm.vscale(i32 1) + %1 = mul i32 %vscale, 2 + ret i32 %1 +} + +; CHECK-LABEL: cntd_max: +; CHECK: cntd x0, all, mul #15 +; CHECK-NEXT: ret +define i32 @cntd_max() nounwind { + %vscale = call i32 @llvm.vscale(i32 1) + %1 = mul i32 %vscale, 30 + ret i32 %1 +} + +; CHECK-LABEL: cntd_neg: +; CHECK: cntd [[CNT:x[0-9]+]] +; CHECK: neg x0, [[CNT]] +; CHECK-NEXT: ret +define i32 @cntd_neg() nounwind { + %vscale = call i32 @llvm.vscale(i32 1) + %1 = mul i32 %vscale, -2 + ret i32 %1 +} + +declare i32 @llvm.vscale(i32)