diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -1027,8 +1027,8 @@ unsigned EltSzInBytes = Sz / 8; unsigned SzInBytes = EltSzInBytes * ChainSize; - VectorType *VecTy; - VectorType *VecStoreTy = dyn_cast(StoreTy); + FixedVectorType *VecTy; + auto *VecStoreTy = dyn_cast(StoreTy); if (VecStoreTy) VecTy = FixedVectorType::get(StoreTy->getScalarType(), Chain.size() * VecStoreTy->getNumElements()); @@ -1180,7 +1180,7 @@ unsigned EltSzInBytes = Sz / 8; unsigned SzInBytes = EltSzInBytes * ChainSize; VectorType *VecTy; - VectorType *VecLoadTy = dyn_cast(LoadTy); + auto *VecLoadTy = dyn_cast(LoadTy); if (VecLoadTy) VecTy = FixedVectorType::get(LoadTy->getScalarType(), Chain.size() * VecLoadTy->getNumElements()); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2012,7 +2012,7 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step, Instruction::BinaryOps BinOp) { // Create and check the types. - auto *ValVTy = cast(Val->getType()); + auto *ValVTy = cast(Val->getType()); int VLen = ValVTy->getNumElements(); Type *STy = Val->getType()->getScalarType(); @@ -2776,19 +2776,18 @@ Value *InnerLoopVectorizer::createBitOrPointerCast(Value *V, VectorType *DstVTy, const DataLayout &DL) { // Verify that V is a vector type with same number of elements as DstVTy. - assert(isa(DstVTy) && - "Vector type is assumed to be fixed width."); - unsigned VF = DstVTy->getNumElements(); - VectorType *SrcVecTy = cast(V->getType()); + auto *DstFVTy = cast(DstVTy); + unsigned VF = DstFVTy->getNumElements(); + auto *SrcVecTy = cast(V->getType()); assert((VF == SrcVecTy->getNumElements()) && "Vector dimensions do not match"); Type *SrcElemTy = SrcVecTy->getElementType(); - Type *DstElemTy = DstVTy->getElementType(); + Type *DstElemTy = DstFVTy->getElementType(); assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) && "Vector elements must have same size"); // Do a direct cast if element types are castable. if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) { - return Builder.CreateBitOrPointerCast(V, DstVTy); + return Builder.CreateBitOrPointerCast(V, DstFVTy); } // V cannot be directly casted to desired vector type. // May happen when V is a floating point vector but DstVTy is a vector of @@ -2802,7 +2801,7 @@ IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy)); auto *VecIntTy = FixedVectorType::get(IntTy, VF); Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy); - return Builder.CreateBitOrPointerCast(CastVal, DstVTy); + return Builder.CreateBitOrPointerCast(CastVal, DstFVTy); } void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L, @@ -3503,7 +3502,8 @@ Type *ScalarTruncatedTy = IntegerType::get(OriginalTy->getContext(), KV.second); auto *TruncatedTy = FixedVectorType::get( - ScalarTruncatedTy, cast(OriginalTy)->getNumElements()); + ScalarTruncatedTy, + cast(OriginalTy)->getNumElements()); if (TruncatedTy == OriginalTy) continue; @@ -3553,13 +3553,13 @@ break; } } else if (auto *SI = dyn_cast(I)) { - auto Elements0 = - cast(SI->getOperand(0)->getType())->getNumElements(); + auto Elements0 = cast(SI->getOperand(0)->getType()) + ->getNumElements(); auto *O0 = B.CreateZExtOrTrunc( SI->getOperand(0), FixedVectorType::get(ScalarTruncatedTy, Elements0)); - auto Elements1 = - cast(SI->getOperand(1)->getType())->getNumElements(); + auto Elements1 = cast(SI->getOperand(1)->getType()) + ->getNumElements(); auto *O1 = B.CreateZExtOrTrunc( SI->getOperand(1), FixedVectorType::get(ScalarTruncatedTy, Elements1)); @@ -3569,16 +3569,16 @@ // Don't do anything with the operands, just extend the result. continue; } else if (auto *IE = dyn_cast(I)) { - auto Elements = - cast(IE->getOperand(0)->getType())->getNumElements(); + auto Elements = cast(IE->getOperand(0)->getType()) + ->getNumElements(); auto *O0 = B.CreateZExtOrTrunc( IE->getOperand(0), FixedVectorType::get(ScalarTruncatedTy, Elements)); auto *O1 = B.CreateZExtOrTrunc(IE->getOperand(1), ScalarTruncatedTy); NewI = B.CreateInsertElement(O0, O1, IE->getOperand(2)); } else if (auto *EE = dyn_cast(I)) { - auto Elements = - cast(EE->getOperand(0)->getType())->getNumElements(); + auto Elements = cast(EE->getOperand(0)->getType()) + ->getNumElements(); auto *O0 = B.CreateZExtOrTrunc( EE->getOperand(0), FixedVectorType::get(ScalarTruncatedTy, Elements)); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -286,7 +286,8 @@ static Optional isShuffle(ArrayRef VL) { auto *EI0 = cast(VL[0]); - unsigned Size = EI0->getVectorOperandType()->getNumElements(); + unsigned Size = + cast(EI0->getVectorOperandType())->getNumElements(); Value *Vec1 = nullptr; Value *Vec2 = nullptr; enum ShuffleMode { Unknown, Select, Permute }; @@ -295,7 +296,7 @@ auto *EI = cast(VL[I]); auto *Vec = EI->getVectorOperand(); // All vector operands must have the same number of vector elements. - if (cast(Vec->getType())->getNumElements() != Size) + if (cast(Vec->getType())->getNumElements() != Size) return None; auto *Idx = dyn_cast(EI->getIndexOperand()); if (!Idx) @@ -1411,7 +1412,7 @@ /// \returns the scalarization cost for this type. Scalarization in this /// context means the creation of vectors from a group of scalars. - int getGatherCost(VectorType *Ty, + int getGatherCost(FixedVectorType *Ty, const DenseSet &ShuffledIndices) const; /// \returns the scalarization cost for this list of values. Assuming that @@ -1424,7 +1425,7 @@ void setInsertPointAfterBundle(TreeEntry *E); /// \returns a vector from a collection of scalars in \p VL. - Value *Gather(ArrayRef VL, VectorType *Ty); + Value *Gather(ArrayRef VL, FixedVectorType *Ty); /// \returns whether the VectorizableTree is fully vectorizable and will /// be beneficial even the tree height is tiny. @@ -3166,7 +3167,7 @@ N *= AT->getNumElements(); EltTy = AT->getElementType(); } else { - auto *VT = cast(EltTy); + auto *VT = cast(EltTy); N *= VT->getNumElements(); EltTy = VT->getElementType(); } @@ -3204,7 +3205,7 @@ if (!LI || !LI->isSimple() || !LI->hasNUses(VL.size())) return false; } else { - NElts = cast(Vec->getType())->getNumElements(); + NElts = cast(Vec->getType())->getNumElements(); } if (NElts != VL.size()) @@ -3255,8 +3256,8 @@ } static std::pair -getVectorCallCosts(CallInst *CI, VectorType *VecTy, TargetTransformInfo *TTI, - TargetLibraryInfo *TLI) { +getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy, + TargetTransformInfo *TTI, TargetLibraryInfo *TLI) { Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); // Calculate the cost of the scalar and vector calls. @@ -3928,7 +3929,7 @@ return Cost; } -int BoUpSLP::getGatherCost(VectorType *Ty, +int BoUpSLP::getGatherCost(FixedVectorType *Ty, const DenseSet &ShuffledIndices) const { unsigned NumElts = Ty->getNumElements(); APInt DemandedElts = APInt::getNullValue(NumElts); @@ -4041,7 +4042,7 @@ Builder.SetCurrentDebugLocation(Front->getDebugLoc()); } -Value *BoUpSLP::Gather(ArrayRef VL, VectorType *Ty) { +Value *BoUpSLP::Gather(ArrayRef VL, FixedVectorType *Ty) { Value *Vec = UndefValue::get(Ty); // Generate the 'InsertElement' instruction. for (unsigned i = 0; i < Ty->getNumElements(); ++i) { diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -437,8 +437,10 @@ TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy)) return false; - unsigned DestNumElts = DestTy->getNumElements(); - unsigned SrcNumElts = SrcTy->getNumElements(); + // FIXME: it should be possible to implement the computation of the widened + // shuffle mask in terms of ElementCount to work with scalable shuffles. + unsigned DestNumElts = cast(DestTy)->getNumElements(); + unsigned SrcNumElts = cast(SrcTy)->getNumElements(); SmallVector NewMask; if (SrcNumElts <= DestNumElts) { // The bitcast is from wide to narrow/equal elements. The shuffle mask can