diff --git a/llvm/lib/Target/X86/X86InterleavedAccess.cpp b/llvm/lib/Target/X86/X86InterleavedAccess.cpp --- a/llvm/lib/Target/X86/X86InterleavedAccess.cpp +++ b/llvm/lib/Target/X86/X86InterleavedAccess.cpp @@ -69,7 +69,7 @@ /// Breaks down a vector \p 'Inst' of N elements into \p NumSubVectors /// sub vectors of type \p T. Returns the sub-vectors in \p DecomposedVectors. - void decompose(Instruction *Inst, unsigned NumSubVectors, VectorType *T, + void decompose(Instruction *Inst, unsigned NumSubVectors, FixedVectorType *T, SmallVectorImpl &DecomposedVectors); /// Performs matrix transposition on a 4x4 matrix \p InputVectors and @@ -165,7 +165,7 @@ } void X86InterleavedAccessGroup::decompose( - Instruction *VecInst, unsigned NumSubVectors, VectorType *SubVecTy, + Instruction *VecInst, unsigned NumSubVectors, FixedVectorType *SubVecTy, SmallVectorImpl &DecomposedVectors) { assert((isa(VecInst) || isa(VecInst)) && "Expected Load or Shuffle"); @@ -727,13 +727,13 @@ bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() { SmallVector DecomposedVectors; SmallVector TransposedVectors; - VectorType *ShuffleTy = Shuffles[0]->getType(); + auto *ShuffleTy = cast(Shuffles[0]->getType()); if (isa(Inst)) { // Try to generate target-sized register(/instruction). decompose(Inst, Factor, ShuffleTy, DecomposedVectors); - auto *ShuffleEltTy = cast(Inst->getType()); + auto *ShuffleEltTy = cast(Inst->getType()); unsigned NumSubVecElems = ShuffleEltTy->getNumElements() / Factor; // Perform matrix-transposition in order to compute interleaved // results by generating some sort of (optimized) target-specific @@ -832,7 +832,8 @@ assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && "Invalid interleave factor"); - assert(SVI->getType()->getNumElements() % Factor == 0 && + assert(cast(SVI->getType())->getNumElements() % Factor == + 0 && "Invalid interleaved store"); // Holds the indices of SVI that correspond to the starting index of each diff --git a/llvm/lib/Target/X86/X86PartialReduction.cpp b/llvm/lib/Target/X86/X86PartialReduction.cpp --- a/llvm/lib/Target/X86/X86PartialReduction.cpp +++ b/llvm/lib/Target/X86/X86PartialReduction.cpp @@ -68,7 +68,7 @@ return false; // Need at least 8 elements. - if (cast(Op->getType())->getNumElements() < 8) + if (cast(Op->getType())->getNumElements() < 8) return false; // Element type should be i32. @@ -136,7 +136,7 @@ IRBuilder<> Builder(Mul); - auto *MulTy = cast(Op->getType()); + auto *MulTy = cast(Op->getType()); unsigned NumElts = MulTy->getNumElements(); // Extract even elements and odd elements and add them together. This will @@ -211,7 +211,7 @@ IRBuilder<> Builder(SI); - auto *OpTy = cast(Op->getType()); + auto *OpTy = cast(Op->getType()); unsigned NumElts = OpTy->getNumElements(); unsigned IntrinsicNumElts; @@ -265,7 +265,7 @@ unsigned Stages = Log2_32(NumSplits); for (unsigned s = Stages; s > 0; --s) { unsigned NumConcatElts = - cast(Ops[0]->getType())->getNumElements() * 2; + cast(Ops[0]->getType())->getNumElements() * 2; for (unsigned i = 0; i != 1U << (s - 1); ++i) { SmallVector ConcatMask(NumConcatElts); std::iota(ConcatMask.begin(), ConcatMask.end(), 0); @@ -275,13 +275,14 @@ // At this point the final value should be in Ops[0]. Now we need to adjust // it to the final original type. - NumElts = cast(OpTy)->getNumElements(); + NumElts = cast(OpTy)->getNumElements(); if (NumElts == 2) { // Extract down to 2 elements. Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ArrayRef{0, 1}); } else if (NumElts >= 8) { SmallVector ConcatMask(NumElts); - unsigned SubElts = cast(Ops[0]->getType())->getNumElements(); + unsigned SubElts = + cast(Ops[0]->getType())->getNumElements(); for (unsigned i = 0; i != SubElts; ++i) ConcatMask[i] = i; for (unsigned i = SubElts; i != NumElts; ++i) @@ -309,7 +310,7 @@ if (!BO || BO->getOpcode() != Instruction::Add || !BO->hasOneUse()) return nullptr; - unsigned NumElems = cast(BO->getType())->getNumElements(); + unsigned NumElems = cast(BO->getType())->getNumElements(); // Ensure the reduction size is a power of 2. if (!isPowerOf2_32(NumElems)) return nullptr; diff --git a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp --- a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp +++ b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp @@ -36,7 +36,7 @@ // // <4 x i32> - auto *CstTy = dyn_cast(C->getType()); + auto *CstTy = dyn_cast(C->getType()); if (!CstTy) return false; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -174,13 +174,13 @@ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); int getInterleavedMemoryOpCostAVX512( - unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, - Align Alignment, unsigned AddressSpace, + unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, + ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); int getInterleavedMemoryOpCostAVX2( - unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, - Align Alignment, unsigned AddressSpace, + unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, + ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -973,7 +973,7 @@ // FIXME: Remove some of the alignment restrictions. // FIXME: We can use permq for 64-bit or larger extracts from 256-bit // vectors. - int OrigSubElts = cast(SubTp)->getNumElements(); + int OrigSubElts = cast(SubTp)->getNumElements(); if (NumSubElts > OrigSubElts && (Index % OrigSubElts) == 0 && (NumSubElts % OrigSubElts) == 0 && LT.second.getVectorElementType() == @@ -1047,7 +1047,8 @@ if (LegalVT.isVector() && LegalVT.getVectorElementType().getSizeInBits() == BaseTp->getElementType()->getPrimitiveSizeInBits() && - LegalVT.getVectorNumElements() < BaseTp->getNumElements()) { + LegalVT.getVectorNumElements() < + cast(BaseTp)->getNumElements()) { unsigned VecTySize = DL.getTypeStoreSize(BaseTp); unsigned LegalVTSize = LegalVT.getStoreSize(); @@ -2935,7 +2936,8 @@ // 128-bit vector is free. // NOTE: This assumes legalization widens vXf32 vectors. if (MScalarTy == MVT::f32) - for (unsigned i = 0, e = Ty->getNumElements(); i < e; i += 4) + for (unsigned i = 0, e = cast(Ty)->getNumElements(); + i < e; i += 4) if (DemandedElts[i]) Cost--; } @@ -2951,7 +2953,8 @@ // vector elements, which represents the number of unpacks we'll end up // performing. unsigned NumElts = LT.second.getVectorNumElements(); - unsigned Pow2Elts = PowerOf2Ceil(Ty->getNumElements()); + unsigned Pow2Elts = + PowerOf2Ceil(cast(Ty)->getNumElements()); Cost += (std::min(NumElts, Pow2Elts) - 1) * LT.first; } } @@ -2983,7 +2986,7 @@ } // Handle non-power-of-two vectors such as <3 x float> - if (VectorType *VTy = dyn_cast(Src)) { + if (auto *VTy = dyn_cast(Src)) { unsigned NumElem = VTy->getNumElements(); // Handle a few common cases: @@ -3036,7 +3039,7 @@ bool IsLoad = (Instruction::Load == Opcode); bool IsStore = (Instruction::Store == Opcode); - VectorType *SrcVTy = dyn_cast(SrcTy); + auto *SrcVTy = dyn_cast(SrcTy); if (!SrcVTy) // To calculate scalar take the regular cost, without mask return getMemoryOpCost(Opcode, SrcTy, Alignment, AddressSpace, CostKind); @@ -3181,7 +3184,7 @@ MVT MTy = LT.second; - auto *ValVTy = cast(ValTy); + auto *ValVTy = cast(ValTy); unsigned ArithmeticCost = 0; if (LT.first != 1 && MTy.isVector() && @@ -3562,7 +3565,7 @@ return Entry->Cost; } - auto *ValVTy = cast(ValTy); + auto *ValVTy = cast(ValTy); unsigned NumVecElts = ValVTy->getNumElements(); auto *Ty = ValVTy; @@ -3850,7 +3853,7 @@ Align Alignment, unsigned AddressSpace) { assert(isa(SrcVTy) && "Unexpected type in getGSVectorCost"); - unsigned VF = cast(SrcVTy)->getNumElements(); + unsigned VF = cast(SrcVTy)->getNumElements(); // Try to reduce index size from 64 bit (default for GEP) // to 32. It is essential for VF 16. If the index can't be reduced to 32, the @@ -3921,7 +3924,7 @@ int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy, bool VariableMask, Align Alignment, unsigned AddressSpace) { - unsigned VF = cast(SrcVTy)->getNumElements(); + unsigned VF = cast(SrcVTy)->getNumElements(); APInt DemandedElts = APInt::getAllOnesValue(VF); TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; @@ -3969,7 +3972,7 @@ return 1; assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter"); - unsigned VF = cast(SrcVTy)->getNumElements(); + unsigned VF = cast(SrcVTy)->getNumElements(); PointerType *PtrTy = dyn_cast(Ptr->getType()); if (!PtrTy && Ptr->getType()->isVectorTy()) PtrTy = dyn_cast( @@ -4020,7 +4023,7 @@ // The backend can't handle a single element vector. if (isa(DataTy) && - cast(DataTy)->getNumElements() == 1) + cast(DataTy)->getNumElements() == 1) return false; Type *ScalarTy = DataTy->getScalarType(); @@ -4085,7 +4088,7 @@ return false; // The backend can't handle a single element vector. - if (cast(DataTy)->getNumElements() == 1) + if (cast(DataTy)->getNumElements() == 1) return false; Type *ScalarTy = cast(DataTy)->getElementType(); @@ -4124,7 +4127,7 @@ // In this case we can reject non-power-of-2 vectors. // We also reject single element vectors as the type legalizer can't // scalarize it. - if (auto *DataVTy = dyn_cast(DataTy)) { + if (auto *DataVTy = dyn_cast(DataTy)) { unsigned NumElts = DataVTy->getNumElements(); if (NumElts == 1 || !isPowerOf2_32(NumElts)) return false; @@ -4245,9 +4248,9 @@ // shuffles. We therefore use a lookup table instead, filled according to // the instruction sequences that codegen currently generates. int X86TTIImpl::getInterleavedMemoryOpCostAVX2( - unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, - Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, - bool UseMaskForCond, bool UseMaskForGaps) { + unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, + ArrayRef Indices, Align Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { if (UseMaskForCond || UseMaskForGaps) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, @@ -4274,8 +4277,8 @@ Alignment, AddressSpace, CostKind); - unsigned VF = cast(VecTy)->getNumElements() / Factor; - Type *ScalarTy = cast(VecTy)->getElementType(); + unsigned VF = VecTy->getNumElements() / Factor; + Type *ScalarTy = VecTy->getElementType(); // Calculate the number of memory operations (NumOfMemOps), required // for load/store the VecTy. @@ -4284,9 +4287,8 @@ unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize; // Get the cost of one memory operation. - auto *SingleMemOpTy = - FixedVectorType::get(cast(VecTy)->getElementType(), - LegalVT.getVectorNumElements()); + auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(), + LegalVT.getVectorNumElements()); unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, MaybeAlign(Alignment), AddressSpace, CostKind); @@ -4363,9 +4365,9 @@ // \p Factor - the factor of interleaving. // AVX-512 provides 3-src shuffles that significantly reduces the cost. int X86TTIImpl::getInterleavedMemoryOpCostAVX512( - unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, - Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, - bool UseMaskForCond, bool UseMaskForGaps) { + unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, + ArrayRef Indices, Align Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { if (UseMaskForCond || UseMaskForGaps) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, @@ -4384,14 +4386,13 @@ unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize; // Get the cost of one memory operation. - auto *SingleMemOpTy = - FixedVectorType::get(cast(VecTy)->getElementType(), - LegalVT.getVectorNumElements()); + auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(), + LegalVT.getVectorNumElements()); unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, MaybeAlign(Alignment), AddressSpace, CostKind); - unsigned VF = cast(VecTy)->getNumElements() / Factor; + unsigned VF = VecTy->getNumElements() / Factor; MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF); if (Opcode == Instruction::Load) { @@ -4423,9 +4424,8 @@ unsigned NumOfLoadsInInterleaveGrp = Indices.size() ? Indices.size() : Factor; - auto *ResultTy = FixedVectorType::get( - cast(VecTy)->getElementType(), - cast(VecTy)->getNumElements() / Factor); + auto *ResultTy = FixedVectorType::get(VecTy->getElementType(), + VecTy->getNumElements() / Factor); unsigned NumOfResults = getTLI()->getTypeLegalizationCost(DL, ResultTy).first * NumOfLoadsInInterleaveGrp; @@ -4501,13 +4501,13 @@ return false; }; if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI())) - return getInterleavedMemoryOpCostAVX512(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, CostKind, - UseMaskForCond, UseMaskForGaps); + return getInterleavedMemoryOpCostAVX512( + Opcode, cast(VecTy), Factor, Indices, Alignment, + AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); if (ST->hasAVX2()) - return getInterleavedMemoryOpCostAVX2(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, CostKind, - UseMaskForCond, UseMaskForGaps); + return getInterleavedMemoryOpCostAVX2( + Opcode, cast(VecTy), Factor, Indices, Alignment, + AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind,