diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -8457,7 +8457,8 @@ case SVE::BI__builtin_sve_svlen_u64: { SVETypeFlags TF(Builtin->TypeModifier); auto VTy = cast(getSVEType(TF)); - auto NumEls = llvm::ConstantInt::get(Ty, VTy->getElementCount().Min); + auto *NumEls = + llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue()); Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty); return Builder.CreateMul(NumEls, Builder.CreateCall(F)); diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -726,7 +726,7 @@ { ASTContext::BuiltinVectorTypeInfo Info = CGM.getContext().getBuiltinVectorTypeInfo(BT); - unsigned NumElemsPerVG = (Info.EC.Min * Info.NumVectors) / 2; + unsigned NumElemsPerVG = (Info.EC.getKnownMinValue() * Info.NumVectors) / 2; // Debuggers can't extract 1bit from a vector, so will display a // bitpattern for svbool_t instead. diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -586,7 +586,8 @@ ASTContext::BuiltinVectorTypeInfo Info = Context.getBuiltinVectorTypeInfo(cast(Ty)); return llvm::ScalableVectorType::get(ConvertType(Info.ElementType), - Info.EC.Min * Info.NumVectors); + Info.EC.getKnownMinValue() * + Info.NumVectors); } case BuiltinType::Dependent: #define BUILTIN_TYPE(Id, SingletonId) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -130,8 +130,8 @@ unsigned Factor); IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, ElementCount Factor) - : IntrinsicCostAttributes(Id, CI, Factor.Min) { - assert(!Factor.Scalable); + : IntrinsicCostAttributes(Id, CI, Factor.getKnownMinValue()) { + assert(!Factor.isScalable()); } IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -115,7 +115,7 @@ Parameters.push_back( VFParameter({CI.arg_size(), VFParamKind::GlobalPredicate})); - return {EC.Min, EC.Scalable, Parameters}; + return {EC.getKnownMinValue(), EC.isScalable(), Parameters}; } /// Sanity check on the Parameters in the VFShape. bool hasValidParameterList() const; diff --git a/llvm/include/llvm/CodeGen/ValueTypes.h b/llvm/include/llvm/CodeGen/ValueTypes.h --- a/llvm/include/llvm/CodeGen/ValueTypes.h +++ b/llvm/include/llvm/CodeGen/ValueTypes.h @@ -304,7 +304,7 @@ /// Given a vector type, return the minimum number of elements it contains. unsigned getVectorMinNumElements() const { - return getVectorElementCount().Min; + return getVectorElementCount().getKnownMinValue(); } /// Return the size of the specified value type in bits. @@ -383,7 +383,7 @@ EVT getHalfNumVectorElementsVT(LLVMContext &Context) const { EVT EltVT = getVectorElementType(); auto EltCnt = getVectorElementCount(); - assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!"); + assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!"); return EVT::getVectorVT(Context, EltVT, EltCnt / 2); } @@ -398,7 +398,8 @@ EVT getPow2VectorType(LLVMContext &Context) const { if (!isPow2VectorType()) { ElementCount NElts = getVectorElementCount(); - NElts.Min = 1 << Log2_32_Ceil(NElts.Min); + unsigned NewMinCount = 1 << Log2_32_Ceil(NElts.getKnownMinValue()); + NElts = ElementCount::get(NewMinCount, NElts.isScalable()); return EVT::getVectorVT(Context, getVectorElementType(), NElts); } else { diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -696,9 +696,9 @@ case Type::ScalableVectorTyID: { VectorType *VTy = cast(Ty); auto EltCnt = VTy->getElementCount(); - uint64_t MinBits = EltCnt.Min * - getTypeSizeInBits(VTy->getElementType()).getFixedSize(); - return TypeSize(MinBits, EltCnt.Scalable); + uint64_t MinBits = EltCnt.getKnownMinValue() * + getTypeSizeInBits(VTy->getElementType()).getFixedSize(); + return TypeSize(MinBits, EltCnt.isScalable()); } default: llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type"); diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h --- a/llvm/include/llvm/IR/DerivedTypes.h +++ b/llvm/include/llvm/IR/DerivedTypes.h @@ -426,16 +426,16 @@ unsigned getNumElements() const { ElementCount EC = getElementCount(); #ifdef STRICT_FIXED_SIZE_VECTORS - assert(!EC.Scalable && + assert(!EC.isScalable() && "Request for fixed number of elements from scalable vector"); - return EC.Min; + return EC.getKnownMinValue(); #else - if (EC.Scalable) + if (EC.isScalable()) WithColor::warning() << "The code that requested the fixed number of elements has made " "the assumption that this vector is not scalable. This assumption " "was not correct, and this may lead to broken code\n"; - return EC.Min; + return EC.getKnownMinValue(); #endif } @@ -512,8 +512,8 @@ /// input type and the same element type. static VectorType *getHalfElementsVectorType(VectorType *VTy) { auto EltCnt = VTy->getElementCount(); - assert ((EltCnt.Min & 1) == 0 && - "Cannot halve vector with odd number of elements."); + assert(EltCnt.isKnownEven() && + "Cannot halve vector with odd number of elements."); return VectorType::get(VTy->getElementType(), EltCnt/2); } @@ -521,7 +521,8 @@ /// input type and the same element type. static VectorType *getDoubleElementsVectorType(VectorType *VTy) { auto EltCnt = VTy->getElementCount(); - assert((EltCnt.Min * 2ull) <= UINT_MAX && "Too many elements in vector"); + assert((EltCnt.getKnownMinValue() * 2ull) <= UINT_MAX && + "Too many elements in vector"); return VectorType::get(VTy->getElementType(), EltCnt * 2); } diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -2046,8 +2046,9 @@ /// Examples: shufflevector <4 x n> A, <4 x n> B, <1,2,3> /// shufflevector <4 x n> A, <4 x n> B, <1,2,3,4,5> bool changesLength() const { - unsigned NumSourceElts = - cast(Op<0>()->getType())->getElementCount().Min; + unsigned NumSourceElts = cast(Op<0>()->getType()) + ->getElementCount() + .getKnownMinValue(); unsigned NumMaskElts = ShuffleMask.size(); return NumSourceElts != NumMaskElts; } diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h --- a/llvm/include/llvm/Support/MachineValueType.h +++ b/llvm/include/llvm/Support/MachineValueType.h @@ -424,7 +424,7 @@ MVT getHalfNumVectorElementsVT() const { MVT EltVT = getVectorElementType(); auto EltCnt = getVectorElementCount(); - assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!"); + assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!"); return getVectorVT(EltVT, EltCnt / 2); } @@ -742,7 +742,7 @@ /// Given a vector type, return the minimum number of elements it contains. unsigned getVectorMinNumElements() const { - return getVectorElementCount().Min; + return getVectorElementCount().getKnownMinValue(); } /// Returns the size of the specified MVT in bits. @@ -1207,9 +1207,9 @@ } static MVT getVectorVT(MVT VT, ElementCount EC) { - if (EC.Scalable) - return getScalableVectorVT(VT, EC.Min); - return getVectorVT(VT, EC.Min); + if (EC.isScalable()) + return getScalableVectorVT(VT, EC.getKnownMinValue()); + return getVectorVT(VT, EC.getKnownMinValue()); } /// Return the value type corresponding to the specified type. This returns diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h --- a/llvm/include/llvm/Support/TypeSize.h +++ b/llvm/include/llvm/Support/TypeSize.h @@ -27,6 +27,10 @@ class ElementCount { private: + unsigned Min; // Minimum number of vector elements. + bool Scalable; // If true, NumElements is a multiple of 'Min' determined + // at runtime rather than compile time. + /// Prevent code from using initializer-list contructors like /// ElementCount EC = {, }. The static `get*` /// methods below are preferred, as users should always make a @@ -35,10 +39,6 @@ ElementCount(unsigned Min, bool Scalable) : Min(Min), Scalable(Scalable) {} public: - unsigned Min; // Minimum number of vector elements. - bool Scalable; // If true, NumElements is a multiple of 'Min' determined - // at runtime rather than compile time. - ElementCount() = default; ElementCount operator*(unsigned RHS) { @@ -58,6 +58,16 @@ bool operator==(unsigned RHS) const { return Min == RHS && !Scalable; } bool operator!=(unsigned RHS) const { return !(*this == RHS); } + ElementCount &operator*=(unsigned RHS) { + Min *= RHS; + return *this; + } + + ElementCount &operator/=(unsigned RHS) { + Min /= RHS; + return *this; + } + ElementCount NextPowerOf2() const { return {(unsigned)llvm::NextPowerOf2(Min), Scalable}; } @@ -81,11 +91,21 @@ /// ///@{ No elements.. bool isZero() const { return Min == 0; } + /// At least one element. + bool isNonZero() const { return Min != 0; } + /// A return value of true indicates we know at compile time that the number + /// of elements (vscale * Min) is definitely even. However, returning false + /// does not guarantee that the total number of elements is odd. + bool isKnownEven() const { return (Min & 0x1) == 0; } /// Exactly one element. bool isScalar() const { return !Scalable && Min == 1; } /// One or more elements. bool isVector() const { return (Scalable && Min != 0) || Min > 1; } ///@} + + unsigned getKnownMinValue() const { return Min; } + + bool isScalable() const { return Scalable; } }; /// Stream operator function for `ElementCount`. @@ -322,10 +342,11 @@ return ElementCount::getFixed(~0U - 1); } static unsigned getHashValue(const ElementCount& EltCnt) { - if (EltCnt.Scalable) - return (EltCnt.Min * 37U) - 1U; + unsigned HashVal = EltCnt.getKnownMinValue() * 37U; + if (EltCnt.isScalable()) + return (HashVal - 1U); - return EltCnt.Min * 37U; + return HashVal; } static bool isEqual(const ElementCount& LHS, const ElementCount& RHS) { diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4550,7 +4550,7 @@ unsigned MaskNumElts = Mask.size(); ElementCount InVecEltCount = InVecTy->getElementCount(); - bool Scalable = InVecEltCount.Scalable; + bool Scalable = InVecEltCount.isScalable(); SmallVector Indices; Indices.assign(Mask.begin(), Mask.end()); @@ -4559,7 +4559,7 @@ // replace that input vector with undef. if (!Scalable) { bool MaskSelects0 = false, MaskSelects1 = false; - unsigned InVecNumElts = InVecEltCount.Min; + unsigned InVecNumElts = InVecEltCount.getKnownMinValue(); for (unsigned i = 0; i != MaskNumElts; ++i) { if (Indices[i] == -1) continue; @@ -4588,7 +4588,8 @@ // is not known at compile time for scalable vectors if (!Scalable && Op0Const && !Op1Const) { std::swap(Op0, Op1); - ShuffleVectorInst::commuteShuffleMask(Indices, InVecEltCount.Min); + ShuffleVectorInst::commuteShuffleMask(Indices, + InVecEltCount.getKnownMinValue()); } // A splat of an inserted scalar constant becomes a vector constant: diff --git a/llvm/lib/Analysis/VFABIDemangling.cpp b/llvm/lib/Analysis/VFABIDemangling.cpp --- a/llvm/lib/Analysis/VFABIDemangling.cpp +++ b/llvm/lib/Analysis/VFABIDemangling.cpp @@ -442,7 +442,7 @@ if (!F) return None; const ElementCount EC = getECFromSignature(F->getFunctionType()); - VF = EC.Min; + VF = EC.getKnownMinValue(); } // Sanity checks. diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4808,7 +4808,8 @@ auto *VTy = cast(Op->getOperand(0)->getType()); unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 2 : 1; auto *Idx = dyn_cast(Op->getOperand(IdxOp)); - if (!Idx || Idx->getZExtValue() >= VTy->getElementCount().Min) + if (!Idx || + Idx->getZExtValue() >= VTy->getElementCount().getKnownMinValue()) return true; return false; } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -970,7 +970,7 @@ // VECTOR [numelts, eltty] or // [numelts, eltty, scalable] Code = bitc::TYPE_CODE_VECTOR; - TypeVals.push_back(VT->getElementCount().Min); + TypeVals.push_back(VT->getElementCount().getKnownMinValue()); TypeVals.push_back(VE.getTypeID(VT->getElementType())); if (isa(VT)) TypeVals.push_back(true); diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -6957,10 +6957,10 @@ if (UseSplat) return ConstantVector::getSplat(EC, Val); - if (!EC.Scalable) { + if (!EC.isScalable()) { SmallVector ConstVec; UndefValue *UndefVal = UndefValue::get(Val->getType()); - for (unsigned Idx = 0; Idx != EC.Min; ++Idx) { + for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) { if (Idx == ExtractIdx) ConstVec.push_back(Val); else diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18994,7 +18994,7 @@ // check the other type in the cast to make sure this is really legal. EVT VT = N->getValueType(0); EVT SrcEltVT = SrcVT.getVectorElementType(); - unsigned NumElts = SrcVT.getVectorElementCount().Min * N->getNumOperands(); + ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands(); EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); switch (CastOpcode) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -428,10 +428,10 @@ // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the // elements we want. if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { - assert((PartEVT.getVectorElementCount().Min > - ValueVT.getVectorElementCount().Min) && - (PartEVT.getVectorElementCount().Scalable == - ValueVT.getVectorElementCount().Scalable) && + assert((PartEVT.getVectorElementCount().getKnownMinValue() > + ValueVT.getVectorElementCount().getKnownMinValue()) && + (PartEVT.getVectorElementCount().isScalable() == + ValueVT.getVectorElementCount().isScalable()) && "Cannot narrow, it would be a lossy transformation"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, DAG.getVectorIdxConstant(0, DL)); @@ -3751,7 +3751,7 @@ if (IsVectorGEP && !N.getValueType().isVector()) { LLVMContext &Context = *DAG.getContext(); EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount); - if (VectorElementCount.Scalable) + if (VectorElementCount.isScalable()) N = DAG.getSplatVector(VT, dl, N); else N = DAG.getSplatBuildVector(VT, dl, N); @@ -3824,7 +3824,7 @@ if (!IdxN.getValueType().isVector() && IsVectorGEP) { EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorElementCount); - if (VectorElementCount.Scalable) + if (VectorElementCount.isScalable()) IdxN = DAG.getSplatVector(VT, dl, IdxN); else IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -964,23 +964,24 @@ // Scalable vectors cannot be scalarized, so splitting or widening is // required. - if (VT.isScalableVector() && !isPowerOf2_32(EC.Min)) + if (VT.isScalableVector() && !isPowerOf2_32(EC.getKnownMinValue())) llvm_unreachable( "Splitting or widening of non-power-of-2 MVTs is not implemented."); // FIXME: We don't support non-power-of-2-sized vectors for now. // Ideally we could break down into LHS/RHS like LegalizeDAG does. - if (!isPowerOf2_32(EC.Min)) { + if (!isPowerOf2_32(EC.getKnownMinValue())) { // Split EC to unit size (scalable property is preserved). - NumVectorRegs = EC.Min; - EC = EC / NumVectorRegs; + NumVectorRegs = EC.getKnownMinValue(); + EC = ElementCount::getFixed(1); } // Divide the input until we get to a supported size. This will // always end up with an EC that represent a scalar or a scalable // scalar. - while (EC.Min > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, EC))) { - EC.Min >>= 1; + while (EC.getKnownMinValue() > 1 && + !TLI->isTypeLegal(MVT::getVectorVT(EltTy, EC))) { + EC /= 2; NumVectorRegs <<= 1; } @@ -1315,13 +1316,15 @@ } case TypeWidenVector: - if (isPowerOf2_32(EC.Min)) { + if (isPowerOf2_32(EC.getKnownMinValue())) { // Try to widen the vector. for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { MVT SVT = (MVT::SimpleValueType) nVT; if (SVT.getVectorElementType() == EltVT && SVT.isScalableVector() == IsScalable && - SVT.getVectorElementCount().Min > EC.Min && isTypeLegal(SVT)) { + SVT.getVectorElementCount().getKnownMinValue() > + EC.getKnownMinValue() && + isTypeLegal(SVT)) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; @@ -1365,10 +1368,10 @@ ValueTypeActions.setTypeAction(VT, TypeScalarizeVector); else if (PreferredAction == TypeSplitVector) ValueTypeActions.setTypeAction(VT, TypeSplitVector); - else if (EC.Min > 1) + else if (EC.getKnownMinValue() > 1) ValueTypeActions.setTypeAction(VT, TypeSplitVector); else - ValueTypeActions.setTypeAction(VT, EC.Scalable + ValueTypeActions.setTypeAction(VT, EC.isScalable() ? TypeScalarizeScalableVector : TypeScalarizeVector); } else { @@ -1426,7 +1429,8 @@ // This handles things like <2 x float> -> <4 x float> and // <4 x i1> -> <4 x i32>. LegalizeTypeAction TA = getTypeAction(Context, VT); - if (EltCnt.Min != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) { + if (EltCnt.getKnownMinValue() != 1 && + (TA == TypeWidenVector || TA == TypePromoteInteger)) { EVT RegisterEVT = getTypeToTransformTo(Context, VT); if (isTypeLegal(RegisterEVT)) { IntermediateVT = RegisterEVT; @@ -1443,7 +1447,7 @@ // Scalable vectors cannot be scalarized, so handle the legalisation of the // types like done elsewhere in SelectionDAG. - if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.Min)) { + if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.getKnownMinValue())) { LegalizeKind LK; EVT PartVT = VT; do { @@ -1452,15 +1456,15 @@ PartVT = LK.second; } while (LK.first != TypeLegal); - NumIntermediates = - VT.getVectorElementCount().Min / PartVT.getVectorElementCount().Min; + NumIntermediates = VT.getVectorElementCount().getKnownMinValue() / + PartVT.getVectorElementCount().getKnownMinValue(); // FIXME: This code needs to be extended to handle more complex vector // breakdowns, like nxv7i64 -> nxv8i64 -> 4 x nxv2i64. Currently the only // supported cases are vectors that are broken down into equal parts // such as nxv6i64 -> 3 x nxv2i64. - assert(NumIntermediates * PartVT.getVectorElementCount().Min == - VT.getVectorElementCount().Min && + assert((PartVT.getVectorElementCount() * NumIntermediates) == + VT.getVectorElementCount() && "Expected an integer multiple of PartVT"); IntermediateVT = PartVT; RegisterVT = getRegisterType(Context, IntermediateVT); @@ -1469,16 +1473,16 @@ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally // we could break down into LHS/RHS like LegalizeDAG does. - if (!isPowerOf2_32(EltCnt.Min)) { - NumVectorRegs = EltCnt.Min; - EltCnt.Min = 1; + if (!isPowerOf2_32(EltCnt.getKnownMinValue())) { + NumVectorRegs = EltCnt.getKnownMinValue(); + EltCnt = ElementCount::getFixed(1); } // Divide the input until we get to a supported size. This will always // end with a scalar if the target doesn't support vectors. - while (EltCnt.Min > 1 && + while (EltCnt.getKnownMinValue() > 1 && !isTypeLegal(EVT::getVectorVT(Context, EltTy, EltCnt))) { - EltCnt.Min >>= 1; + EltCnt /= 2; NumVectorRegs <<= 1; } diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -122,13 +122,13 @@ unsigned EVT::getExtendedVectorNumElements() const { assert(isExtended() && "Type is not extended!"); ElementCount EC = cast(LLVMTy)->getElementCount(); - if (EC.Scalable) { + if (EC.isScalable()) { WithColor::warning() << "The code that requested the fixed number of elements has made the " "assumption that this vector is not scalable. This assumption was " "not correct, and this may lead to broken code\n"; } - return EC.Min; + return EC.getKnownMinValue(); } ElementCount EVT::getExtendedVectorElementCount() const { @@ -150,9 +150,9 @@ switch (V.SimpleTy) { default: if (isVector()) - return (isScalableVector() ? "nxv" : "v") - + utostr(getVectorElementCount().Min) - + getVectorElementType().getEVTString(); + return (isScalableVector() ? "nxv" : "v") + + utostr(getVectorElementCount().getKnownMinValue()) + + getVectorElementType().getEVTString(); if (isInteger()) return "i" + utostr(getSizeInBits()); if (isFloatingPoint()) diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -656,9 +656,9 @@ VectorType *PTy = cast(Ty); ElementCount EC = PTy->getElementCount(); OS << "<"; - if (EC.Scalable) + if (EC.isScalable()) OS << "vscale x "; - OS << EC.Min << " x "; + OS << EC.getKnownMinValue() << " x "; print(PTy->getElementType(), OS); OS << '>'; return; diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -931,7 +931,7 @@ // If the mask is all zeros this is a splat, no need to go through all // elements. if (all_of(Mask, [](int Elt) { return Elt == 0; }) && - !MaskEltCount.Scalable) { + !MaskEltCount.isScalable()) { Type *Ty = IntegerType::get(V1->getContext(), 32); Constant *Elt = ConstantExpr::getExtractElement(V1, ConstantInt::get(Ty, 0)); @@ -942,7 +942,7 @@ if (isa(V1VTy)) return nullptr; - unsigned SrcNumElts = V1VTy->getElementCount().Min; + unsigned SrcNumElts = V1VTy->getElementCount().getKnownMinValue(); // Loop over the shuffle mask, evaluating each element. SmallVector Result; @@ -2056,11 +2056,12 @@ SmallVector ResElts; Type *Ty = IntegerType::get(C1->getContext(), 32); // Compare the elements, producing an i1 result or constant expr. - for (unsigned i = 0, e = C1VTy->getElementCount().Min; i != e; ++i) { + for (unsigned I = 0, E = C1VTy->getElementCount().getKnownMinValue(); + I != E; ++I) { Constant *C1E = - ConstantExpr::getExtractElement(C1, ConstantInt::get(Ty, i)); + ConstantExpr::getExtractElement(C1, ConstantInt::get(Ty, I)); Constant *C2E = - ConstantExpr::getExtractElement(C2, ConstantInt::get(Ty, i)); + ConstantExpr::getExtractElement(C2, ConstantInt::get(Ty, I)); ResElts.push_back(ConstantExpr::getCompare(pred, C1E, C2E)); } diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -1300,14 +1300,14 @@ } Constant *ConstantVector::getSplat(ElementCount EC, Constant *V) { - if (!EC.Scalable) { + if (!EC.isScalable()) { // If this splat is compatible with ConstantDataVector, use it instead of // ConstantVector. if ((isa(V) || isa(V)) && ConstantDataSequential::isElementTypeCompatible(V->getType())) - return ConstantDataVector::getSplat(EC.Min, V); + return ConstantDataVector::getSplat(EC.getKnownMinValue(), V); - SmallVector Elts(EC.Min, V); + SmallVector Elts(EC.getKnownMinValue(), V); return get(Elts); } @@ -1324,7 +1324,7 @@ Constant *UndefV = UndefValue::get(VTy); V = ConstantExpr::getInsertElement(UndefV, V, ConstantInt::get(I32Ty, 0)); // Build shuffle mask to perform the splat. - SmallVector Zeros(EC.Min, 0); + SmallVector Zeros(EC.getKnownMinValue(), 0); // Splat. return ConstantExpr::getShuffleVector(V, UndefV, Zeros); } @@ -2264,7 +2264,7 @@ if (VectorType *VecTy = dyn_cast(Idx->getType())) EltCount = VecTy->getElementCount(); - if (EltCount.Min != 0) + if (EltCount.isNonZero()) ReqTy = VectorType::get(ReqTy, EltCount); if (OnlyIfReducedTy == ReqTy) @@ -2284,7 +2284,7 @@ if (GTI.isStruct() && Idx->getType()->isVectorTy()) { Idx = Idx->getSplatValue(); - } else if (GTI.isSequential() && EltCount.Min != 0 && + } else if (GTI.isSequential() && EltCount.isNonZero() && !Idx->getType()->isVectorTy()) { Idx = ConstantVector::getSplat(EltCount, Idx); } diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -781,7 +781,7 @@ } unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy) { - return unwrap(VectorTy)->getElementCount().Min; + return unwrap(VectorTy)->getElementCount().getKnownMinValue(); } /*--.. Operations on other types ...........................................--*/ diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -630,7 +630,7 @@ // We're only calculating a natural alignment, so it doesn't have to be // based on the full size for scalable vectors. Using the minimum element // count should be enough here. - Alignment *= cast(Ty)->getElementCount().Min; + Alignment *= cast(Ty)->getElementCount().getKnownMinValue(); Alignment = PowerOf2Ceil(Alignment); return Align(Alignment); } diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -714,9 +714,10 @@ Result += "f"; } else if (VectorType* VTy = dyn_cast(Ty)) { ElementCount EC = VTy->getElementCount(); - if (EC.Scalable) + if (EC.isScalable()) Result += "nx"; - Result += "v" + utostr(EC.Min) + getMangledTypeStr(VTy->getElementType()); + Result += "v" + utostr(EC.getKnownMinValue()) + + getMangledTypeStr(VTy->getElementType()); } else if (Ty) { switch (Ty->getTypeID()) { default: llvm_unreachable("Unhandled type"); diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -1003,7 +1003,7 @@ Value *IRBuilderBase::CreateVectorSplat(ElementCount EC, Value *V, const Twine &Name) { - assert(EC.Min > 0 && "Cannot splat to an empty vector!"); + assert(EC.isNonZero() && "Cannot splat to an empty vector!"); // First insert it into an undef vector so we can shuffle it. Type *I32Ty = getInt32Ty(); diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -1967,7 +1967,8 @@ return false; // Make sure the mask elements make sense. - int V1Size = cast(V1->getType())->getElementCount().Min; + int V1Size = + cast(V1->getType())->getElementCount().getKnownMinValue(); for (int Elem : Mask) if (Elem != UndefMaskElem && Elem >= V1Size * 2) return false; @@ -2026,22 +2027,22 @@ ElementCount EC = cast(Mask->getType())->getElementCount(); if (isa(Mask)) { - Result.resize(EC.Min, 0); + Result.resize(EC.getKnownMinValue(), 0); return; } - Result.reserve(EC.Min); + Result.reserve(EC.getKnownMinValue()); - if (EC.Scalable) { + if (EC.isScalable()) { assert((isa(Mask) || isa(Mask)) && "Scalable vector shuffle mask must be undef or zeroinitializer"); int MaskVal = isa(Mask) ? -1 : 0; - for (unsigned I = 0; I < EC.Min; ++I) + for (unsigned I = 0; I < EC.getKnownMinValue(); ++I) Result.emplace_back(MaskVal); return; } - unsigned NumElts = EC.Min; + unsigned NumElts = EC.getKnownMinValue(); if (auto *CDS = dyn_cast(Mask)) { for (unsigned i = 0; i != NumElts; ++i) diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -280,8 +280,8 @@ // the operation. This function returns true when this is detected statically // in the IR. - // Check whether "W == vscale * EC.Min" - if (EC.Scalable) { + // Check whether "W == vscale * EC.getKnownMinValue()" + if (EC.isScalable()) { // Undig the DL auto ParMod = this->getModule(); if (!ParMod) @@ -291,8 +291,8 @@ // Compare vscale patterns uint64_t VScaleFactor; if (match(VLParam, m_c_Mul(m_ConstantInt(VScaleFactor), m_VScale(DL)))) - return VScaleFactor >= EC.Min; - return (EC.Min == 1) && match(VLParam, m_VScale(DL)); + return VScaleFactor >= EC.getKnownMinValue(); + return (EC.getKnownMinValue() == 1) && match(VLParam, m_VScale(DL)); } // standard SIMD operation @@ -301,7 +301,7 @@ return false; uint64_t VLNum = VLConst->getZExtValue(); - if (VLNum >= EC.Min) + if (VLNum >= EC.getKnownMinValue()) return true; return false; diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -128,7 +128,7 @@ ElementCount EC = VTy->getElementCount(); TypeSize ETS = VTy->getElementType()->getPrimitiveSizeInBits(); assert(!ETS.isScalable() && "Vector type should have fixed-width elements"); - return {ETS.getFixedSize() * EC.Min, EC.Scalable}; + return {ETS.getFixedSize() * EC.getKnownMinValue(), EC.isScalable()}; } default: return TypeSize::Fixed(0); } @@ -598,10 +598,10 @@ } VectorType *VectorType::get(Type *ElementType, ElementCount EC) { - if (EC.Scalable) - return ScalableVectorType::get(ElementType, EC.Min); + if (EC.isScalable()) + return ScalableVectorType::get(ElementType, EC.getKnownMinValue()); else - return FixedVectorType::get(ElementType, EC.Min); + return FixedVectorType::get(ElementType, EC.getKnownMinValue()); } bool VectorType::isValidElementType(Type *ElemTy) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -4827,7 +4827,8 @@ return EVT(); ElementCount EC = PredVT.getVectorElementCount(); - EVT ScalarVT = EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.Min); + EVT ScalarVT = + EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue()); EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec); return MemVT; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3532,8 +3532,9 @@ // 256 bit non-temporal stores can be lowered to STNP. Do this as part of // the custom lowering, as there are no un-paired non-temporal stores and // legalization will break up 256 bit inputs. + ElementCount EC = MemVT.getVectorElementCount(); if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u && - MemVT.getVectorElementCount().Min % 2u == 0 && + EC.isKnownEven() && ((MemVT.getScalarSizeInBits() == 8u || MemVT.getScalarSizeInBits() == 16u || MemVT.getScalarSizeInBits() == 32u || @@ -3542,11 +3543,11 @@ DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl, MemVT.getHalfNumVectorElementsVT(*DAG.getContext()), StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64)); - SDValue Hi = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, Dl, - MemVT.getHalfNumVectorElementsVT(*DAG.getContext()), - StoreNode->getValue(), - DAG.getConstant(MemVT.getVectorElementCount().Min / 2, Dl, MVT::i64)); + SDValue Hi = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl, + MemVT.getHalfNumVectorElementsVT(*DAG.getContext()), + StoreNode->getValue(), + DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64)); SDValue Result = DAG.getMemIntrinsicNode( AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other), {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()}, @@ -10370,7 +10371,7 @@ {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}}; std::tie(N, Opcode) = IntrinsicMap[Intrinsic]; - assert(VT.getVectorElementCount().Min % N == 0 && + assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 && "invalid tuple vector type!"); EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), @@ -14443,7 +14444,7 @@ uint64_t IdxConst = cast(Idx)->getZExtValue(); EVT ResVT = N->getValueType(0); - uint64_t NumLanes = ResVT.getVectorElementCount().Min; + uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue(); SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL); SDValue Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx); @@ -14457,10 +14458,11 @@ SDValue Vec = N->getOperand(4); EVT TupleVT = Tuple.getValueType(); - uint64_t TupleLanes = TupleVT.getVectorElementCount().Min; + uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue(); uint64_t IdxConst = cast(Idx)->getZExtValue(); - uint64_t NumLanes = Vec.getValueType().getVectorElementCount().Min; + uint64_t NumLanes = + Vec.getValueType().getVectorElementCount().getKnownMinValue(); if ((TupleLanes % NumLanes) != 0) report_fatal_error("invalid tuple vector!"); @@ -14696,7 +14698,7 @@ ElementCount ResEC = VT.getVectorElementCount(); - if (InVT.getVectorElementCount().Min != (ResEC.Min * 2)) + if (InVT.getVectorElementCount() != (ResEC * 2)) return; auto *CIndex = dyn_cast(N->getOperand(1)); @@ -14704,7 +14706,7 @@ return; unsigned Index = CIndex->getZExtValue(); - if ((Index != 0) && (Index != ResEC.Min)) + if ((Index != 0) && (Index != ResEC.getKnownMinValue())) return; unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -340,17 +340,17 @@ auto *IndexC = dyn_cast(Index); if (IndexC) { ElementCount EC = EI.getVectorOperandType()->getElementCount(); - unsigned NumElts = EC.Min; + unsigned NumElts = EC.getKnownMinValue(); // InstSimplify should handle cases where the index is invalid. // For fixed-length vector, it's invalid to extract out-of-range element. - if (!EC.Scalable && IndexC->getValue().uge(NumElts)) + if (!EC.isScalable() && IndexC->getValue().uge(NumElts)) return nullptr; // This instruction only demands the single element from the input vector. // Skip for scalable type, the number of elements is unknown at // compile-time. - if (!EC.Scalable && NumElts != 1) { + if (!EC.isScalable() && NumElts != 1) { // If the input vector has a single use, simplify it based on this use // property. if (SrcVec->hasOneUse()) { diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp --- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -488,12 +488,13 @@ case Type::ScalableVectorTyID: { auto *STyL = cast(TyL); auto *STyR = cast(TyR); - if (STyL->getElementCount().Scalable != STyR->getElementCount().Scalable) - return cmpNumbers(STyL->getElementCount().Scalable, - STyR->getElementCount().Scalable); - if (STyL->getElementCount().Min != STyR->getElementCount().Min) - return cmpNumbers(STyL->getElementCount().Min, - STyR->getElementCount().Min); + if (STyL->getElementCount().isScalable() != + STyR->getElementCount().isScalable()) + return cmpNumbers(STyL->getElementCount().isScalable(), + STyR->getElementCount().isScalable()); + if (STyL->getElementCount() != STyR->getElementCount()) + return cmpNumbers(STyL->getElementCount().getKnownMinValue(), + STyR->getElementCount().getKnownMinValue()); return cmpTypes(STyL->getElementType(), STyR->getElementType()); } } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -342,7 +342,7 @@ /// type is irregular if its allocated size doesn't equal the store size of an /// element of the corresponding vector type at the given vectorization factor. static bool hasIrregularType(Type *Ty, const DataLayout &DL, ElementCount VF) { - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); // Determine if an array of VF elements of type Ty is "bitcast compatible" // with a vector. if (VF.isVector()) { @@ -899,8 +899,9 @@ const DILocation *DIL = Inst->getDebugLoc(); if (DIL && Inst->getFunction()->isDebugInfoForProfiling() && !isa(Inst)) { - assert(!VF.Scalable && "scalable vectors not yet supported."); - auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(UF * VF.Min); + assert(!VF.isScalable() && "scalable vectors not yet supported."); + auto NewDIL = + DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue()); if (NewDIL) B.SetCurrentDebugLocation(NewDIL.getValue()); else @@ -1216,7 +1217,7 @@ /// width \p VF. Return CM_Unknown if this instruction did not pass /// through the cost modeling. InstWidening getWideningDecision(Instruction *I, ElementCount VF) { - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); assert(VF.isVector() && "Expected VF >=2"); // Cost model is not run in the VPlan-native path - return conservative @@ -1837,7 +1838,8 @@ // Multiply the vectorization factor by the step using integer or // floating-point arithmetic as appropriate. - Value *ConstVF = getSignedIntOrFpConstant(Step->getType(), VF.Min); + Value *ConstVF = + getSignedIntOrFpConstant(Step->getType(), VF.getKnownMinValue()); Value *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, Step, ConstVF)); // Create a vector splat to use in the induction update. @@ -1845,7 +1847,7 @@ // FIXME: If the step is non-constant, we create the vector splat with // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't // handle a constant vector splat. - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); Value *SplatVF = isa(Mul) ? ConstantVector::getSplat(VF, cast(Mul)) : Builder.CreateVectorSplat(VF, Mul); @@ -1982,9 +1984,10 @@ auto CreateSplatIV = [&](Value *ScalarIV, Value *Step) { Value *Broadcasted = getBroadcastInstrs(ScalarIV); for (unsigned Part = 0; Part < UF; ++Part) { - assert(!VF.Scalable && "scalable vectors not yet supported."); - Value *EntryPart = getStepVector(Broadcasted, VF.Min * Part, Step, - ID.getInductionOpcode()); + assert(!VF.isScalable() && "scalable vectors not yet supported."); + Value *EntryPart = + getStepVector(Broadcasted, VF.getKnownMinValue() * Part, Step, + ID.getInductionOpcode()); VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart); if (Trunc) addMetadata(EntryPart, Trunc); @@ -2093,7 +2096,7 @@ const InductionDescriptor &ID) { // We shouldn't have to build scalar steps if we aren't vectorizing. assert(VF.isVector() && "VF should be greater than one"); - assert(!VF.Scalable && + assert(!VF.isScalable() && "the code below assumes a fixed number of elements at compile time"); // Get the value type and ensure it and the step have the same integer type. Type *ScalarIVTy = ScalarIV->getType()->getScalarType(); @@ -2118,12 +2121,12 @@ unsigned Lanes = Cost->isUniformAfterVectorization(cast(EntryVal), VF) ? 1 - : VF.Min; + : VF.getKnownMinValue(); // Compute the scalar steps and save the results in VectorLoopValueMap. for (unsigned Part = 0; Part < UF; ++Part) { for (unsigned Lane = 0; Lane < Lanes; ++Lane) { - auto *StartIdx = - getSignedIntOrFpConstant(ScalarIVTy, VF.Min * Part + Lane); + auto *StartIdx = getSignedIntOrFpConstant( + ScalarIVTy, VF.getKnownMinValue() * Part + Lane); auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step)); auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul)); VectorLoopValueMap.setScalarValue(EntryVal, {Part, Lane}, Add); @@ -2166,9 +2169,10 @@ // is known to be uniform after vectorization, this corresponds to lane zero // of the Part unroll iteration. Otherwise, the last instruction is the one // we created for the last vector lane of the Part unroll iteration. - assert(!VF.Scalable && "scalable vectors not yet supported."); - unsigned LastLane = - Cost->isUniformAfterVectorization(I, VF) ? 0 : VF.Min - 1; + assert(!VF.isScalable() && "scalable vectors not yet supported."); + unsigned LastLane = Cost->isUniformAfterVectorization(I, VF) + ? 0 + : VF.getKnownMinValue() - 1; auto *LastInst = cast( VectorLoopValueMap.getScalarValue(V, {Part, LastLane})); @@ -2190,10 +2194,10 @@ VectorLoopValueMap.setVectorValue(V, Part, VectorValue); } else { // Initialize packing with insertelements to start from undef. - assert(!VF.Scalable && "VF is assumed to be non scalable."); + assert(!VF.isScalable() && "VF is assumed to be non scalable."); Value *Undef = UndefValue::get(VectorType::get(V->getType(), VF)); VectorLoopValueMap.setVectorValue(V, Part, Undef); - for (unsigned Lane = 0; Lane < VF.Min; ++Lane) + for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) packScalarIntoVectorValue(V, {Part, Lane}); VectorValue = VectorLoopValueMap.getVectorValue(V, Part); } @@ -2257,10 +2261,10 @@ Value *InnerLoopVectorizer::reverseVector(Value *Vec) { assert(Vec->getType()->isVectorTy() && "Invalid type"); - assert(!VF.Scalable && "Cannot reverse scalable vectors"); + assert(!VF.isScalable() && "Cannot reverse scalable vectors"); SmallVector ShuffleMask; - for (unsigned i = 0; i < VF.Min; ++i) - ShuffleMask.push_back(VF.Min - i - 1); + for (unsigned i = 0; i < VF.getKnownMinValue(); ++i) + ShuffleMask.push_back(VF.getKnownMinValue() - i - 1); return Builder.CreateShuffleVector(Vec, UndefValue::get(Vec->getType()), ShuffleMask, "reverse"); @@ -2314,7 +2318,7 @@ // Prepare for the vector type of the interleaved load/store. Type *ScalarTy = getMemInstValueType(Instr); unsigned InterleaveFactor = Group->getFactor(); - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); auto *VecTy = VectorType::get(ScalarTy, VF * InterleaveFactor); // Prepare for the new pointers. @@ -2331,10 +2335,10 @@ // pointer operand of the interleaved access is supposed to be uniform. For // uniform instructions, we're only required to generate a value for the // first vector lane in each unroll iteration. - assert(!VF.Scalable && + assert(!VF.isScalable() && "scalable vector reverse operation is not implemented"); if (Group->isReverse()) - Index += (VF.Min - 1) * Group->getFactor(); + Index += (VF.getKnownMinValue() - 1) * Group->getFactor(); for (unsigned Part = 0; Part < UF; Part++) { Value *AddrPart = State.get(Addr, {Part, 0}); @@ -2369,8 +2373,8 @@ Value *MaskForGaps = nullptr; if (Group->requiresScalarEpilogue() && !Cost->isScalarEpilogueAllowed()) { - assert(!VF.Scalable && "scalable vectors not yet supported."); - MaskForGaps = createBitMaskForGaps(Builder, VF.Min, *Group); + assert(!VF.isScalable() && "scalable vectors not yet supported."); + MaskForGaps = createBitMaskForGaps(Builder, VF.getKnownMinValue(), *Group); assert(MaskForGaps && "Mask for Gaps is required but it is null"); } @@ -2387,10 +2391,10 @@ if (BlockInMask) { Value *BlockInMaskPart = State.get(BlockInMask, Part); auto *Undefs = UndefValue::get(BlockInMaskPart->getType()); - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); Value *ShuffledMask = Builder.CreateShuffleVector( BlockInMaskPart, Undefs, - createReplicatedMask(InterleaveFactor, VF.Min), + createReplicatedMask(InterleaveFactor, VF.getKnownMinValue()), "interleaved.mask"); GroupMask = MaskForGaps ? Builder.CreateBinOp(Instruction::And, ShuffledMask, @@ -2417,15 +2421,16 @@ if (!Member) continue; - assert(!VF.Scalable && "scalable vectors not yet supported."); - auto StrideMask = createStrideMask(I, InterleaveFactor, VF.Min); + assert(!VF.isScalable() && "scalable vectors not yet supported."); + auto StrideMask = + createStrideMask(I, InterleaveFactor, VF.getKnownMinValue()); for (unsigned Part = 0; Part < UF; Part++) { Value *StridedVec = Builder.CreateShuffleVector( NewLoads[Part], UndefVec, StrideMask, "strided.vec"); // If this member has different type, cast the result type. if (Member->getType() != ScalarTy) { - assert(!VF.Scalable && "VF is assumed to be non scalable."); + assert(!VF.isScalable() && "VF is assumed to be non scalable."); VectorType *OtherVTy = VectorType::get(Member->getType(), VF); StridedVec = createBitOrPointerCast(StridedVec, OtherVTy, DL); } @@ -2440,7 +2445,7 @@ } // The sub vector type for current instruction. - assert(!VF.Scalable && "VF is assumed to be non scalable."); + assert(!VF.isScalable() && "VF is assumed to be non scalable."); auto *SubVT = VectorType::get(ScalarTy, VF); // Vectorize the interleaved store group. @@ -2469,9 +2474,10 @@ Value *WideVec = concatenateVectors(Builder, StoredVecs); // Interleave the elements in the wide vector. - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); Value *IVec = Builder.CreateShuffleVector( - WideVec, UndefVec, createInterleaveMask(VF.Min, InterleaveFactor), + WideVec, UndefVec, + createInterleaveMask(VF.getKnownMinValue(), InterleaveFactor), "interleaved.vec"); Instruction *NewStoreInstr; @@ -2480,7 +2486,8 @@ auto *Undefs = UndefValue::get(BlockInMaskPart->getType()); Value *ShuffledMask = Builder.CreateShuffleVector( BlockInMaskPart, Undefs, - createReplicatedMask(InterleaveFactor, VF.Min), "interleaved.mask"); + createReplicatedMask(InterleaveFactor, VF.getKnownMinValue()), + "interleaved.mask"); NewStoreInstr = Builder.CreateMaskedStore( IVec, AddrParts[Part], Group->getAlign(), ShuffledMask); } @@ -2514,7 +2521,7 @@ Type *ScalarDataTy = getMemInstValueType(Instr); - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); auto *DataTy = VectorType::get(ScalarDataTy, VF); const Align Alignment = getLoadStoreAlignment(Instr); @@ -2550,16 +2557,16 @@ // If the address is consecutive but reversed, then the // wide store needs to start at the last vector element. PartPtr = cast(Builder.CreateGEP( - ScalarDataTy, Ptr, Builder.getInt32(-Part * VF.Min))); + ScalarDataTy, Ptr, Builder.getInt32(-Part * VF.getKnownMinValue()))); PartPtr->setIsInBounds(InBounds); PartPtr = cast(Builder.CreateGEP( - ScalarDataTy, PartPtr, Builder.getInt32(1 - VF.Min))); + ScalarDataTy, PartPtr, Builder.getInt32(1 - VF.getKnownMinValue()))); PartPtr->setIsInBounds(InBounds); if (isMaskRequired) // Reverse of a null all-one mask is a null mask. BlockInMaskParts[Part] = reverseVector(BlockInMaskParts[Part]); } else { PartPtr = cast(Builder.CreateGEP( - ScalarDataTy, Ptr, Builder.getInt32(Part * VF.Min))); + ScalarDataTy, Ptr, Builder.getInt32(Part * VF.getKnownMinValue()))); PartPtr->setIsInBounds(InBounds); } @@ -2756,8 +2763,8 @@ Type *Ty = TC->getType(); // This is where we can make the step a runtime constant. - assert(!VF.Scalable && "scalable vectorization is not supported yet"); - Constant *Step = ConstantInt::get(Ty, VF.Min * UF); + assert(!VF.isScalable() && "scalable vectorization is not supported yet"); + Constant *Step = ConstantInt::get(Ty, VF.getKnownMinValue() * UF); // If the tail is to be folded by masking, round the number of iterations N // up to a multiple of Step instead of rounding down. This is done by first @@ -2766,10 +2773,10 @@ // that it starts at zero and its Step is a power of two; the loop will then // exit, with the last early-exit vector comparison also producing all-true. if (Cost->foldTailByMasking()) { - assert(isPowerOf2_32(VF.Min * UF) && + assert(isPowerOf2_32(VF.getKnownMinValue() * UF) && "VF*UF must be a power of 2 when folding tail by masking"); - TC = Builder.CreateAdd(TC, ConstantInt::get(Ty, VF.Min * UF - 1), - "n.rnd.up"); + TC = Builder.CreateAdd( + TC, ConstantInt::get(Ty, VF.getKnownMinValue() * UF - 1), "n.rnd.up"); } // Now we need to generate the expression for the part of the loop that the @@ -2846,9 +2853,10 @@ // If tail is to be folded, vector loop takes care of all iterations. Value *CheckMinIters = Builder.getFalse(); if (!Cost->foldTailByMasking()) { - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); CheckMinIters = Builder.CreateICmp( - P, Count, ConstantInt::get(Count->getType(), VF.Min * UF), + P, Count, + ConstantInt::get(Count->getType(), VF.getKnownMinValue() * UF), "min.iters.check"); } // Create new preheader for vector loop. @@ -3303,8 +3311,8 @@ Value *StartIdx = ConstantInt::get(IdxTy, 0); // The loop step is equal to the vectorization factor (num of SIMD elements) // times the unroll factor (num of SIMD instructions). - assert(!VF.Scalable && "scalable vectors not yet supported."); - Constant *Step = ConstantInt::get(IdxTy, VF.Min * UF); + assert(!VF.isScalable() && "scalable vectors not yet supported."); + Constant *Step = ConstantInt::get(IdxTy, VF.getKnownMinValue() * UF); Value *CountRoundDown = getOrCreateVectorTripCount(Lp); Induction = createInductionVariable(Lp, StartIdx, CountRoundDown, Step, @@ -3438,7 +3446,7 @@ unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI, ElementCount VF, bool &NeedToScalarize) { - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); Function *F = CI->getCalledFunction(); Type *ScalarRetTy = CI->getType(); SmallVector Tys, ScalarTys; @@ -3463,7 +3471,7 @@ // packing the return values to a vector. unsigned ScalarizationCost = getScalarizationOverhead(CI, VF); - unsigned Cost = ScalarCallCost * VF.Min + ScalarizationCost; + unsigned Cost = ScalarCallCost * VF.getKnownMinValue() + ScalarizationCost; // If we can't emit a vector call for this function, then the currently found // cost is the cost we need to return. @@ -3684,11 +3692,11 @@ // profile is not inherently precise anyway. Note also possible bypass of // vector code caused by legality checks is ignored, assigning all the weight // to the vector loop, optimistically. - assert(!VF.Scalable && + assert(!VF.isScalable() && "cannot use scalable ElementCount to determine unroll factor"); - setProfileInfoAfterUnrolling(LI->getLoopFor(LoopScalarBody), - LI->getLoopFor(LoopVectorBody), - LI->getLoopFor(LoopScalarBody), VF.Min * UF); + setProfileInfoAfterUnrolling( + LI->getLoopFor(LoopScalarBody), LI->getLoopFor(LoopVectorBody), + LI->getLoopFor(LoopScalarBody), VF.getKnownMinValue() * UF); } void InnerLoopVectorizer::fixCrossIterationPHIs() { @@ -3769,10 +3777,10 @@ auto *VectorInit = ScalarInit; if (VF.isVector()) { Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); - assert(!VF.Scalable && "VF is assumed to be non scalable."); + assert(!VF.isScalable() && "VF is assumed to be non scalable."); VectorInit = Builder.CreateInsertElement( UndefValue::get(VectorType::get(VectorInit->getType(), VF)), VectorInit, - Builder.getInt32(VF.Min - 1), "vector.recur.init"); + Builder.getInt32(VF.getKnownMinValue() - 1), "vector.recur.init"); } // We constructed a temporary phi node in the first phase of vectorization. @@ -3813,11 +3821,11 @@ // We will construct a vector for the recurrence by combining the values for // the current and previous iterations. This is the required shuffle mask. - assert(!VF.Scalable); - SmallVector ShuffleMask(VF.Min); - ShuffleMask[0] = VF.Min - 1; - for (unsigned I = 1; I < VF.Min; ++I) - ShuffleMask[I] = I + VF.Min - 1; + assert(!VF.isScalable()); + SmallVector ShuffleMask(VF.getKnownMinValue()); + ShuffleMask[0] = VF.getKnownMinValue() - 1; + for (unsigned I = 1; I < VF.getKnownMinValue(); ++I) + ShuffleMask[I] = I + VF.getKnownMinValue() - 1; // The vector from which to take the initial value for the current iteration // (actual or unrolled). Initially, this is the vector phi node. @@ -3846,7 +3854,8 @@ if (VF.isVector()) { Builder.SetInsertPoint(LoopMiddleBlock->getTerminator()); ExtractForScalar = Builder.CreateExtractElement( - ExtractForScalar, Builder.getInt32(VF.Min - 1), "vector.recur.extract"); + ExtractForScalar, Builder.getInt32(VF.getKnownMinValue() - 1), + "vector.recur.extract"); } // Extract the second last element in the middle block if the // Phi is used outside the loop. We need to extract the phi itself @@ -3856,7 +3865,8 @@ Value *ExtractForPhiUsedOutsideLoop = nullptr; if (VF.isVector()) ExtractForPhiUsedOutsideLoop = Builder.CreateExtractElement( - Incoming, Builder.getInt32(VF.Min - 2), "vector.recur.extract.for.phi"); + Incoming, Builder.getInt32(VF.getKnownMinValue() - 2), + "vector.recur.extract.for.phi"); // When loop is unrolled without vectorizing, initialize // ExtractForPhiUsedOutsideLoop with the value just prior to unrolled value of // `Incoming`. This is analogous to the vectorized case above: extracting the @@ -4013,7 +4023,7 @@ // entire expression in the smaller type. if (VF.isVector() && Phi->getType() != RdxDesc.getRecurrenceType()) { assert(!IsInLoopReductionPhi && "Unexpected truncated inloop reduction!"); - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF); Builder.SetInsertPoint( LI->getLoopFor(LoopVectorBody)->getLoopLatch()->getTerminator()); @@ -4145,7 +4155,7 @@ } void InnerLoopVectorizer::fixLCSSAPHIs() { - assert(!VF.Scalable && "the code below assumes fixed width vectors"); + assert(!VF.isScalable() && "the code below assumes fixed width vectors"); for (PHINode &LCSSAPhi : LoopExitBlock->phis()) { if (LCSSAPhi.getNumIncomingValues() == 1) { auto *IncomingValue = LCSSAPhi.getIncomingValue(0); @@ -4155,7 +4165,7 @@ LastLane = Cost->isUniformAfterVectorization( cast(IncomingValue), VF) ? 0 - : VF.Min - 1; + : VF.getKnownMinValue() - 1; // Can be a loop invariant incoming value or the last scalar value to be // extracted from the vectorized loop. Builder.SetInsertPoint(LoopMiddleBlock->getTerminator()); @@ -4338,7 +4348,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF, ElementCount VF) { - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); PHINode *P = cast(PN); if (EnableVPlanNativePath) { // Currently we enter here in the VPlan-native path for non-induction @@ -4403,11 +4413,12 @@ // Determine the number of scalars we need to generate for each unroll // iteration. If the instruction is uniform, we only need to generate the // first lane. Otherwise, we generate all VF values. - unsigned Lanes = Cost->isUniformAfterVectorization(P, VF) ? 1 : VF.Min; + unsigned Lanes = + Cost->isUniformAfterVectorization(P, VF) ? 1 : VF.getKnownMinValue(); for (unsigned Part = 0; Part < UF; ++Part) { for (unsigned Lane = 0; Lane < Lanes; ++Lane) { - Constant *Idx = - ConstantInt::get(PtrInd->getType(), Lane + Part * VF.Min); + Constant *Idx = ConstantInt::get(PtrInd->getType(), + Lane + Part * VF.getKnownMinValue()); Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx); Value *SclrGep = emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II); @@ -4437,8 +4448,9 @@ Exp.expandCodeFor(ScalarStep, PhiType, InductionLoc); Value *InductionGEP = GetElementPtrInst::Create( ScStValueType->getPointerElementType(), NewPointerPhi, - Builder.CreateMul(ScalarStepValue, - ConstantInt::get(PhiType, VF.Min * UF)), + Builder.CreateMul( + ScalarStepValue, + ConstantInt::get(PhiType, VF.getKnownMinValue() * UF)), "ptr.ind", InductionLoc); NewPointerPhi->addIncoming(InductionGEP, LoopLatch); @@ -4448,15 +4460,17 @@ for (unsigned Part = 0; Part < UF; ++Part) { SmallVector Indices; // Create a vector of consecutive numbers from zero to VF. - for (unsigned i = 0; i < VF.Min; ++i) - Indices.push_back(ConstantInt::get(PhiType, i + Part * VF.Min)); + for (unsigned i = 0; i < VF.getKnownMinValue(); ++i) + Indices.push_back( + ConstantInt::get(PhiType, i + Part * VF.getKnownMinValue())); Constant *StartOffset = ConstantVector::get(Indices); Value *GEP = Builder.CreateGEP( ScStValueType->getPointerElementType(), NewPointerPhi, - Builder.CreateMul(StartOffset, - Builder.CreateVectorSplat(VF.Min, ScalarStepValue), - "vector.gep")); + Builder.CreateMul( + StartOffset, + Builder.CreateVectorSplat(VF.getKnownMinValue(), ScalarStepValue), + "vector.gep")); VectorLoopValueMap.setVectorValue(P, Part, GEP); } } @@ -4483,7 +4497,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I, VPUser &User, VPTransformState &State) { - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); switch (I.getOpcode()) { case Instruction::Call: case Instruction::Br: @@ -4571,7 +4585,7 @@ setDebugLocFromInst(Builder, CI); /// Vectorize casts. - assert(!VF.Scalable && "VF is assumed to be non scalable."); + assert(!VF.isScalable() && "VF is assumed to be non scalable."); Type *DestTy = (VF.isScalar()) ? CI->getType() : VectorType::get(CI->getType(), VF); @@ -4601,7 +4615,7 @@ SmallVector Tys; for (Value *ArgOperand : CI->arg_operands()) - Tys.push_back(ToVectorTy(ArgOperand->getType(), VF.Min)); + Tys.push_back(ToVectorTy(ArgOperand->getType(), VF.getKnownMinValue())); Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); @@ -4633,7 +4647,7 @@ // Use vector version of the intrinsic. Type *TysForDecl[] = {CI->getType()}; if (VF.isVector()) { - assert(!VF.Scalable && "VF is assumed to be non scalable."); + assert(!VF.isScalable() && "VF is assumed to be non scalable."); TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF); } VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl); @@ -4872,7 +4886,7 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, ElementCount VF) { - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); if (!blockNeedsPredication(I->getParent())) return false; switch(I->getOpcode()) { @@ -5357,7 +5371,7 @@ Selected = false; } if (Selected) { - MaxVF = VFs[i].Min; + MaxVF = VFs[i].getKnownMinValue(); break; } } @@ -5558,8 +5572,9 @@ } // Clamp the interleave ranges to reasonable counts. - assert(!VF.Scalable && "scalable vectors not yet supported."); - unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF.Min); + assert(!VF.isScalable() && "scalable vectors not yet supported."); + unsigned MaxInterleaveCount = + TTI.getMaxInterleaveFactor(VF.getKnownMinValue()); // Check if the user has overridden the max. if (VF == 1) { @@ -5573,7 +5588,8 @@ // If trip count is known or estimated compile time constant, limit the // interleave count to be less than the trip count divided by VF. if (BestKnownTC) { - MaxInterleaveCount = std::min(*BestKnownTC / VF.Min, MaxInterleaveCount); + MaxInterleaveCount = + std::min(*BestKnownTC / VF.getKnownMinValue(), MaxInterleaveCount); } // If we did not calculate the cost for VF (because the user selected the VF) @@ -5745,8 +5761,9 @@ if (Ty->isTokenTy()) return 0U; unsigned TypeSize = DL.getTypeSizeInBits(Ty->getScalarType()); - assert(!VF.Scalable && "scalable vectors not yet supported."); - return std::max(1, VF.Min * TypeSize / WidestRegister); + assert(!VF.isScalable() && "scalable vectors not yet supported."); + return std::max(1, VF.getKnownMinValue() * TypeSize / + WidestRegister); }; for (unsigned int i = 0, s = IdxToInstr.size(); i < s; ++i) { @@ -5973,19 +5990,20 @@ // the instruction as if it wasn't if-converted and instead remained in the // predicated block. We will scale this cost by block probability after // computing the scalarization overhead. - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); unsigned ScalarCost = - VF.Min * getInstructionCost(I, ElementCount::getFixed(1)).first; + VF.getKnownMinValue() * + getInstructionCost(I, ElementCount::getFixed(1)).first; // Compute the scalarization overhead of needed insertelement instructions // and phi nodes. if (isScalarWithPredication(I) && !I->getType()->isVoidTy()) { ScalarCost += TTI.getScalarizationOverhead( cast(ToVectorTy(I->getType(), VF)), - APInt::getAllOnesValue(VF.Min), true, false); - assert(!VF.Scalable && "scalable vectors not yet supported."); + APInt::getAllOnesValue(VF.getKnownMinValue()), true, false); + assert(!VF.isScalable() && "scalable vectors not yet supported."); ScalarCost += - VF.Min * + VF.getKnownMinValue() * TTI.getCFInstrCost(Instruction::PHI, TTI::TCK_RecipThroughput); } @@ -6000,10 +6018,10 @@ if (canBeScalarized(J)) Worklist.push_back(J); else if (needsExtract(J, VF)) { - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); ScalarCost += TTI.getScalarizationOverhead( cast(ToVectorTy(J->getType(), VF)), - APInt::getAllOnesValue(VF.Min), false, true); + APInt::getAllOnesValue(VF.getKnownMinValue()), false, true); } } @@ -6021,7 +6039,7 @@ LoopVectorizationCostModel::VectorizationCostTy LoopVectorizationCostModel::expectedCost(ElementCount VF) { - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); VectorizationCostTy Cost; // For each block. @@ -6104,7 +6122,7 @@ ElementCount VF) { assert(VF.isVector() && "Scalarization cost of instruction implies vectorization."); - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); Type *ValTy = getMemInstValueType(I); auto SE = PSE.getSE(); @@ -6117,12 +6135,13 @@ const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, PSE, TheLoop); // Get the cost of the scalar memory instruction and address computation. - unsigned Cost = VF.Min * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV); + unsigned Cost = + VF.getKnownMinValue() * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV); // Don't pass *I here, since it is scalar but will actually be part of a // vectorized loop where the user of it is a vectorized instruction. const Align Alignment = getLoadStoreAlignment(I); - Cost += VF.Min * + Cost += VF.getKnownMinValue() * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment, AS, TTI::TCK_RecipThroughput); @@ -6190,9 +6209,10 @@ return TTI.getAddressComputationCost(ValTy) + TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS, CostKind) + - (isLoopInvariantStoreValue ? 0 : TTI.getVectorInstrCost( - Instruction::ExtractElement, - VectorTy, VF.Min - 1)); + (isLoopInvariantStoreValue + ? 0 + : TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy, + VF.getKnownMinValue() - 1)); } unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I, @@ -6218,7 +6238,7 @@ assert(Group && "Fail to get an interleaved access group."); unsigned InterleaveFactor = Group->getFactor(); - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor); // Holds the indices of existing members in an interleaved load group. @@ -6266,7 +6286,7 @@ LoopVectorizationCostModel::VectorizationCostTy LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF) { - assert(!VF.Scalable && + assert(!VF.isScalable() && "the cost model is not yet implemented for scalable vectorization"); // If we know that this instruction will remain uniform, check the cost of // the scalar version. @@ -6282,22 +6302,24 @@ auto InstSet = ForcedScalar->second; if (InstSet.count(I)) return VectorizationCostTy( - (getInstructionCost(I, ElementCount::getFixed(1)).first * VF.Min), + (getInstructionCost(I, ElementCount::getFixed(1)).first * + VF.getKnownMinValue()), false); } Type *VectorTy; unsigned C = getInstructionCost(I, VF, VectorTy); - bool TypeNotScalarized = VF.isVector() && VectorTy->isVectorTy() && - TTI.getNumberOfParts(VectorTy) < VF.Min; + bool TypeNotScalarized = + VF.isVector() && VectorTy->isVectorTy() && + TTI.getNumberOfParts(VectorTy) < VF.getKnownMinValue(); return VectorizationCostTy(C, TypeNotScalarized); } unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I, ElementCount VF) { - assert(!VF.Scalable && + assert(!VF.isScalable() && "cannot compute scalarization overhead for scalable vectorization"); if (VF.isScalar()) return 0; @@ -6307,7 +6329,8 @@ if (!RetTy->isVoidTy() && (!isa(I) || !TTI.supportsEfficientVectorElementLoadStore())) Cost += TTI.getScalarizationOverhead( - cast(RetTy), APInt::getAllOnesValue(VF.Min), true, false); + cast(RetTy), APInt::getAllOnesValue(VF.getKnownMinValue()), + true, false); // Some targets keep addresses scalar. if (isa(I) && !TTI.prefersVectorizedAddressing()) @@ -6323,13 +6346,12 @@ // Skip operands that do not require extraction/scalarization and do not incur // any overhead. - return Cost + - TTI.getOperandsScalarizationOverhead(filterExtractingOperands(Ops, VF), - VF.Min); + return Cost + TTI.getOperandsScalarizationOverhead( + filterExtractingOperands(Ops, VF), VF.getKnownMinValue()); } void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) { - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); if (VF.isScalar()) return; NumPredStores = 0; @@ -6466,14 +6488,15 @@ // Scalarize a widened load of address. setWideningDecision( I, VF, CM_Scalarize, - (VF.Min * getMemoryInstructionCost(I, ElementCount::getFixed(1)))); + (VF.getKnownMinValue() * + getMemoryInstructionCost(I, ElementCount::getFixed(1)))); else if (auto Group = getInterleavedAccessGroup(I)) { // Scalarize an interleave group of address loads. for (unsigned I = 0; I < Group->getFactor(); ++I) { if (Instruction *Member = Group->getMember(I)) setWideningDecision( Member, VF, CM_Scalarize, - (VF.Min * + (VF.getKnownMinValue() * getMemoryInstructionCost(Member, ElementCount::getFixed(1)))); } } @@ -6515,12 +6538,14 @@ if (ScalarPredicatedBB) { // Return cost for branches around scalarized and predicated blocks. - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); auto *Vec_i1Ty = VectorType::get(IntegerType::getInt1Ty(RetTy->getContext()), VF); return (TTI.getScalarizationOverhead( - Vec_i1Ty, APInt::getAllOnesValue(VF.Min), false, true) + - (TTI.getCFInstrCost(Instruction::Br, CostKind) * VF.Min)); + Vec_i1Ty, APInt::getAllOnesValue(VF.getKnownMinValue()), + false, true) + + (TTI.getCFInstrCost(Instruction::Br, CostKind) * + VF.getKnownMinValue())); } else if (I->getParent() == TheLoop->getLoopLatch() || VF.isScalar()) // The back-edge branch will remain, as will all scalar branches. return TTI.getCFInstrCost(Instruction::Br, CostKind); @@ -6537,9 +6562,9 @@ // First-order recurrences are replaced by vector shuffles inside the loop. // NOTE: Don't use ToVectorTy as SK_ExtractSubvector expects a vector type. if (VF.isVector() && Legal->isFirstOrderRecurrence(Phi)) - return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector, - cast(VectorTy), VF.Min - 1, - FixedVectorType::get(RetTy, 1)); + return TTI.getShuffleCost( + TargetTransformInfo::SK_ExtractSubvector, cast(VectorTy), + VF.getKnownMinValue() - 1, FixedVectorType::get(RetTy, 1)); // Phi nodes in non-header blocks (not inductions, reductions, etc.) are // converted into select instructions. We require N - 1 selects per phi @@ -6568,11 +6593,12 @@ // that we will create. This cost is likely to be zero. The phi node // cost, if any, should be scaled by the block probability because it // models a copy at the end of each predicated block. - Cost += VF.Min * TTI.getCFInstrCost(Instruction::PHI, CostKind); + Cost += VF.getKnownMinValue() * + TTI.getCFInstrCost(Instruction::PHI, CostKind); // The cost of the non-predicated instruction. - Cost += - VF.Min * TTI.getArithmeticInstrCost(I->getOpcode(), RetTy, CostKind); + Cost += VF.getKnownMinValue() * + TTI.getArithmeticInstrCost(I->getOpcode(), RetTy, CostKind); // The cost of insertelement and extractelement instructions needed for // scalarization. @@ -6611,15 +6637,15 @@ Op2VK = TargetTransformInfo::OK_UniformValue; SmallVector Operands(I->operand_values()); - unsigned N = isScalarAfterVectorization(I, VF) ? VF.Min : 1; + unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1; return N * TTI.getArithmeticInstrCost( I->getOpcode(), VectorTy, CostKind, TargetTransformInfo::OK_AnyValue, Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I); } case Instruction::FNeg: { - assert(!VF.Scalable && "VF is assumed to be non scalable."); - unsigned N = isScalarAfterVectorization(I, VF) ? VF.Min : 1; + assert(!VF.isScalable() && "VF is assumed to be non scalable."); + unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1; return N * TTI.getArithmeticInstrCost( I->getOpcode(), VectorTy, CostKind, TargetTransformInfo::OK_AnyValue, @@ -6633,7 +6659,7 @@ bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); Type *CondTy = SI->getCondition()->getType(); if (!ScalarCond) { - assert(!VF.Scalable && "VF is assumed to be non scalable."); + assert(!VF.isScalable() && "VF is assumed to be non scalable."); CondTy = VectorType::get(CondTy, VF); } return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy, @@ -6745,8 +6771,8 @@ } } - assert(!VF.Scalable && "VF is assumed to be non scalable"); - unsigned N = isScalarAfterVectorization(I, VF) ? VF.Min : 1; + assert(!VF.isScalable() && "VF is assumed to be non scalable"); + unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1; return N * TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I); } @@ -6761,9 +6787,8 @@ default: // The cost of executing VF copies of the scalar instruction. This opcode // is unknown. Assume that it is the same as 'mul'. - return VF.Min * - TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, - CostKind) + + return VF.getKnownMinValue() * TTI.getArithmeticInstrCost( + Instruction::Mul, VectorTy, CostKind) + getScalarizationOverhead(I, VF); } // end of switch. } @@ -6870,7 +6895,7 @@ VectorizationFactor LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) { - assert(!UserVF.Scalable && "scalable vectors not yet supported"); + assert(!UserVF.isScalable() && "scalable vectors not yet supported"); ElementCount VF = UserVF; // Outer loop handling: They may require CFG and instruction level // transformations before even evaluating whether vectorization is profitable. @@ -6892,10 +6917,11 @@ } } assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); - assert(isPowerOf2_32(VF.Min) && "VF needs to be a power of two"); + assert(isPowerOf2_32(VF.getKnownMinValue()) && + "VF needs to be a power of two"); LLVM_DEBUG(dbgs() << "LV: Using " << (!UserVF.isZero() ? "user " : "") << "VF " << VF << " to build VPlans.\n"); - buildVPlans(VF.Min, VF.Min); + buildVPlans(VF.getKnownMinValue(), VF.getKnownMinValue()); // For VPlan build stress testing, we bail out after VPlan construction. if (VPlanBuildStressTest) @@ -6912,9 +6938,10 @@ Optional LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { - assert(!UserVF.Scalable && "scalable vectorization not yet handled"); + assert(!UserVF.isScalable() && "scalable vectorization not yet handled"); assert(OrigLoop->empty() && "Inner loop expected."); - Optional MaybeMaxVF = CM.computeMaxVF(UserVF.Min, UserIC); + Optional MaybeMaxVF = + CM.computeMaxVF(UserVF.getKnownMinValue(), UserIC); if (!MaybeMaxVF) // Cases that should not to be vectorized nor interleaved. return None; @@ -6934,12 +6961,14 @@ if (!UserVF.isZero()) { LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); - assert(isPowerOf2_32(UserVF.Min) && "VF needs to be a power of two"); + assert(isPowerOf2_32(UserVF.getKnownMinValue()) && + "VF needs to be a power of two"); // Collect the instructions (and their associated costs) that will be more // profitable to scalarize. CM.selectUserVectorizationFactor(UserVF); CM.collectInLoopReductions(); - buildVPlansWithVPRecipes(UserVF.Min, UserVF.Min); + buildVPlansWithVPRecipes(UserVF.getKnownMinValue(), + UserVF.getKnownMinValue()); LLVM_DEBUG(printPlans(dbgs())); return {{UserVF, 0}}; } @@ -7228,7 +7257,7 @@ "Must be called with either a load or store"); auto willWiden = [&](ElementCount VF) -> bool { - assert(!VF.Scalable && "unexpected scalable ElementCount"); + assert(!VF.isScalable() && "unexpected scalable ElementCount"); if (VF.isScalar()) return false; LoopVectorizationCostModel::InstWidening Decision = @@ -7762,7 +7791,7 @@ ElementCount VF = ElementCount::getFixed(Range.Start); Plan->addVF(VF); RSO << "Initial VPlan for VF={" << VF; - for (VF.Min *= 2; VF.Min < Range.End; VF.Min *= 2) { + for (VF *= 2; VF.getKnownMinValue() < Range.End; VF *= 2) { Plan->addVF(VF); RSO << "," << VF; } @@ -7986,7 +8015,7 @@ if (AlsoPack && State.VF.isVector()) { // If we're constructing lane 0, initialize to start from undef. if (State.Instance->Lane == 0) { - assert(!State.VF.Scalable && "VF is assumed to be non scalable."); + assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); Value *Undef = UndefValue::get(VectorType::get(Ingredient->getType(), State.VF)); State.ValueMap.setVectorValue(Ingredient, State.Instance->Part, Undef); @@ -7999,7 +8028,7 @@ // Generate scalar instances for all VF lanes of all UF parts, unless the // instruction is uniform inwhich case generate only the first lane for each // of the UF parts. - unsigned EndLane = IsUniform ? 1 : State.VF.Min; + unsigned EndLane = IsUniform ? 1 : State.VF.getKnownMinValue(); for (unsigned Part = 0; Part < State.UF; ++Part) for (unsigned Lane = 0; Lane < EndLane; ++Lane) State.ILV->scalarizeInstruction(Ingredient, User, {Part, Lane}, diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -151,14 +151,15 @@ /// \return True if the map has a scalar entry for \p Key and \p Instance. bool hasScalarValue(Value *Key, const VPIteration &Instance) const { assert(Instance.Part < UF && "Queried Scalar Part is too large."); - assert(Instance.Lane < VF.Min && "Queried Scalar Lane is too large."); - assert(!VF.Scalable && "VF is assumed to be non scalable."); + assert(Instance.Lane < VF.getKnownMinValue() && + "Queried Scalar Lane is too large."); + assert(!VF.isScalable() && "VF is assumed to be non scalable."); if (!hasAnyScalarValue(Key)) return false; const ScalarParts &Entry = ScalarMapStorage.find(Key)->second; assert(Entry.size() == UF && "ScalarParts has wrong dimensions."); - assert(Entry[Instance.Part].size() == VF.Min && + assert(Entry[Instance.Part].size() == VF.getKnownMinValue() && "ScalarParts has wrong dimensions."); return Entry[Instance.Part][Instance.Lane] != nullptr; } @@ -197,7 +198,7 @@ // TODO: Consider storing uniform values only per-part, as they occupy // lane 0 only, keeping the other VF-1 redundant entries null. for (unsigned Part = 0; Part < UF; ++Part) - Entry[Part].resize(VF.Min, nullptr); + Entry[Part].resize(VF.getKnownMinValue(), nullptr); ScalarMapStorage[Key] = Entry; } ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -300,8 +300,9 @@ for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) { State->Instance->Part = Part; - assert(!State->VF.Scalable && "VF is assumed to be non scalable."); - for (unsigned Lane = 0, VF = State->VF.Min; Lane < VF; ++Lane) { + assert(!State->VF.isScalable() && "VF is assumed to be non scalable."); + for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF; + ++Lane) { State->Instance->Lane = Lane; // Visit the VPBlocks connected to \p this, starting from it. for (VPBlockBase *Block : RPOT) { @@ -388,7 +389,7 @@ Value *ScalarTC = State.TripCount; auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); - auto *PredTy = FixedVectorType::get(Int1Ty, State.VF.Min); + auto *PredTy = FixedVectorType::get(Int1Ty, State.VF.getKnownMinValue()); Instruction *Call = Builder.CreateIntrinsic( Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()}, {VIVElem0, ScalarTC}, nullptr, "active.lane.mask"); @@ -840,14 +841,16 @@ Type *STy = CanonicalIV->getType(); IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); ElementCount VF = State.VF; - assert(!VF.Scalable && "the code following assumes non scalables ECs"); - Value *VStart = VF.isScalar() ? CanonicalIV - : Builder.CreateVectorSplat(VF.Min, CanonicalIV, - "broadcast"); + assert(!VF.isScalable() && "the code following assumes non scalables ECs"); + Value *VStart = VF.isScalar() + ? CanonicalIV + : Builder.CreateVectorSplat(VF.getKnownMinValue(), + CanonicalIV, "broadcast"); for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { SmallVector Indices; - for (unsigned Lane = 0; Lane < VF.Min; ++Lane) - Indices.push_back(ConstantInt::get(STy, Part * VF.Min + Lane)); + for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) + Indices.push_back( + ConstantInt::get(STy, Part * VF.getKnownMinValue() + Lane)); // If VF == 1, there is only one iteration in the loop above, thus the // element pushed back into Indices is ConstantInt::get(STy, Part) Constant *VStep = VF == 1 ? Indices.back() : ConstantVector::get(Indices); diff --git a/llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp b/llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp --- a/llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp +++ b/llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp @@ -71,8 +71,8 @@ // Check fields inside llvm::ElementCount EltCnt = Vnx4i32.getVectorElementCount(); - EXPECT_EQ(EltCnt.Min, 4U); - ASSERT_TRUE(EltCnt.Scalable); + EXPECT_EQ(EltCnt.getKnownMinValue(), 4U); + ASSERT_TRUE(EltCnt.isScalable()); // Check that fixed-length vector types aren't scalable. EVT V8i32 = EVT::getVectorVT(Ctx, MVT::i32, 8); @@ -82,8 +82,8 @@ // Check that llvm::ElementCount works for fixed-length types. EltCnt = V8i32.getVectorElementCount(); - EXPECT_EQ(EltCnt.Min, 8U); - ASSERT_FALSE(EltCnt.Scalable); + EXPECT_EQ(EltCnt.getKnownMinValue(), 8U); + ASSERT_FALSE(EltCnt.isScalable()); } TEST(ScalableVectorMVTsTest, IRToVTTranslation) { diff --git a/llvm/unittests/IR/VectorTypesTest.cpp b/llvm/unittests/IR/VectorTypesTest.cpp --- a/llvm/unittests/IR/VectorTypesTest.cpp +++ b/llvm/unittests/IR/VectorTypesTest.cpp @@ -119,8 +119,8 @@ EXPECT_EQ(ConvTy->getElementType()->getScalarSizeInBits(), 64U); EltCnt = V8Int64Ty->getElementCount(); - EXPECT_EQ(EltCnt.Min, 8U); - ASSERT_FALSE(EltCnt.Scalable); + EXPECT_EQ(EltCnt.getKnownMinValue(), 8U); + ASSERT_FALSE(EltCnt.isScalable()); } TEST(VectorTypesTest, Scalable) { @@ -215,8 +215,8 @@ EXPECT_EQ(ConvTy->getElementType()->getScalarSizeInBits(), 64U); EltCnt = ScV8Int64Ty->getElementCount(); - EXPECT_EQ(EltCnt.Min, 8U); - ASSERT_TRUE(EltCnt.Scalable); + EXPECT_EQ(EltCnt.getKnownMinValue(), 8U); + ASSERT_TRUE(EltCnt.isScalable()); } TEST(VectorTypesTest, BaseVectorType) { @@ -250,7 +250,7 @@ // test I == J VectorType *VI = VTys[I]; ElementCount ECI = VI->getElementCount(); - EXPECT_EQ(isa(VI), ECI.Scalable); + EXPECT_EQ(isa(VI), ECI.isScalable()); for (size_t J = I + 1, JEnd = VTys.size(); J < JEnd; ++J) { // test I < J