diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1079,6 +1079,12 @@ (all_of(VL, isUsedOutsideBlock) || all_of(VL, areAllOperandsNonInsts)); } +static FixedVectorType *makeScalarTyToVectorTy(Type *ScalarTy, unsigned VF) { + assert(isValidElementType(ScalarTy) && + "ScalarTy is not a valid element type."); + return FixedVectorType::get(ScalarTy, VF); +} + namespace slpvectorizer { /// Bottom Up SLP Vectorizer. @@ -1428,7 +1434,7 @@ if (getUnderlyingObject(LI1->getPointerOperand()) == getUnderlyingObject(LI2->getPointerOperand()) && R.TTI->isLegalMaskedGather( - FixedVectorType::get(LI1->getType(), NumLanes), + makeScalarTyToVectorTy(LI1->getType(), NumLanes), LI1->getAlign())) return LookAheadHeuristics::ScoreMaskedGatherCandidate; return LookAheadHeuristics::ScoreFail; @@ -3920,7 +3926,7 @@ for (Value *V : VL) CommonAlignment = std::min(CommonAlignment, cast(V)->getAlign()); - auto *VecTy = FixedVectorType::get(ScalarTy, VL.size()); + FixedVectorType *VecTy = makeScalarTyToVectorTy(ScalarTy, VL.size()); if (TTI.isLegalMaskedGather(VecTy, CommonAlignment) && !TTI.forceScalarizeMaskedGather(VecTy, CommonAlignment)) return LoadsState::ScatterVectorize; @@ -4221,7 +4227,8 @@ find_if(TE.Scalars, [](Value *V) { return !isConstant(V); }); if (It == TE.Scalars.begin()) return OrdersType(); - auto *Ty = FixedVectorType::get(TE.Scalars.front()->getType(), Sz); + FixedVectorType *Ty = + makeScalarTyToVectorTy(TE.Scalars.front()->getType(), Sz); if (It != TE.Scalars.end()) { OrdersType Order(Sz, Sz); unsigned Idx = std::distance(TE.Scalars.begin(), It); @@ -4333,8 +4340,8 @@ // x86. Reordering them into [fsub,fadd] blocks this pattern. So we need // to take into account their order when looking for the most used order. if (TE->isAltShuffle()) { - VectorType *VecTy = - FixedVectorType::get(TE->Scalars[0]->getType(), TE->Scalars.size()); + FixedVectorType *VecTy = + makeScalarTyToVectorTy(TE->Scalars[0]->getType(), TE->Scalars.size()); unsigned Opcode0 = TE->getOpcode(); unsigned Opcode1 = TE->getAltOpcode(); // The opcode mask selects between the two opcodes. @@ -6171,7 +6178,7 @@ if (!isValidElementType(EltTy)) return 0; - uint64_t VTSize = DL.getTypeStoreSizeInBits(FixedVectorType::get(EltTy, N)); + uint64_t VTSize = DL.getTypeStoreSizeInBits(makeScalarTyToVectorTy(EltTy, N)); if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize || VTSize != DL.getTypeStoreSizeInBits(T)) return 0; return N; @@ -6277,7 +6284,7 @@ SmallVector VecTys; for (Use &Arg : CI->args()) VecTys.push_back( - FixedVectorType::get(Arg->getType(), VecTy->getNumElements())); + makeScalarTyToVectorTy(Arg->getType(), VecTy->getNumElements())); FastMathFlags FMF; if (auto *FPCI = dyn_cast(CI)) FMF = FPCI->getFastMathFlags(); @@ -6751,7 +6758,8 @@ InstructionCost getBuildVectorCost(ArrayRef VL, Value *Root) { if ((!Root && allConstant(VL)) || all_of(VL, UndefValue::classof)) return TTI::TCC_Free; - auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size()); + FixedVectorType *VecTy = + makeScalarTyToVectorTy(VL.front()->getType(), VL.size()); InstructionCost GatherCost = 0; SmallVector Gathers(VL.begin(), VL.end()); // Improve gather cost for gather of loads, if we can group some of the @@ -6828,7 +6836,7 @@ LI->getAlign(), LI->getPointerAddressSpace(), CostKind, TTI::OperandValueInfo(), LI); } - auto *LoadTy = FixedVectorType::get(LI->getType(), VF); + FixedVectorType *LoadTy = makeScalarTyToVectorTy(LI->getType(), VF); Align Alignment = LI->getAlign(); GatherCost += VectorizedCnt * @@ -6878,7 +6886,8 @@ /// extracted values from \p VL. InstructionCost computeExtractCost(ArrayRef VL, ArrayRef Mask, TTI::ShuffleKind ShuffleKind) { - auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size()); + FixedVectorType *VecTy = + makeScalarTyToVectorTy(VL.front()->getType(), VL.size()); unsigned NumOfParts = TTI.getNumberOfParts(VecTy); if (ShuffleKind != TargetTransformInfo::SK_PermuteSingleSrc || @@ -6930,7 +6939,7 @@ // cost to extract the vector with EltsPerVector elements. Cost += TTI.getShuffleCost( TargetTransformInfo::SK_PermuteSingleSrc, - FixedVectorType::get(VecTy->getElementType(), EltsPerVector), + makeScalarTyToVectorTy(VecTy->getElementType(), EltsPerVector), RegMask); } return Cost; @@ -6959,7 +6968,7 @@ return TTI::TCC_Free; return TTI.getShuffleCost( TTI::SK_PermuteTwoSrc, - FixedVectorType::get( + makeScalarTyToVectorTy( cast(V1->getType())->getElementType(), Mask.size()), Mask); } @@ -6969,7 +6978,7 @@ return TTI::TCC_Free; return TTI.getShuffleCost( TTI::SK_PermuteSingleSrc, - FixedVectorType::get( + makeScalarTyToVectorTy( cast(V1->getType())->getElementType(), Mask.size()), Mask); } @@ -7003,7 +7012,7 @@ CommonVF = VF = E->Scalars.size(); } V1 = Constant::getNullValue( - FixedVectorType::get(E->Scalars.front()->getType(), VF)); + makeScalarTyToVectorTy(E->Scalars.front()->getType(), VF)); } if (!V2 && !P2.isNull()) { const TreeEntry *E = P2.get(); @@ -7014,7 +7023,7 @@ if (CommonVF) VF = CommonVF; V2 = Constant::getNullValue( - FixedVectorType::get(E->Scalars.front()->getType(), VF)); + makeScalarTyToVectorTy(E->Scalars.front()->getType(), VF)); } return BaseShuffleAnalysis::createShuffle(V1, V2, Mask, Builder); @@ -7032,7 +7041,8 @@ return nullptr; Value *VecBase = nullptr; ArrayRef VL = E->Scalars; - auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size()); + FixedVectorType *VecTy = + makeScalarTyToVectorTy(VL.front()->getType(), VL.size()); // If the resulting type is scalarized, do not adjust the cost. unsigned VecNumParts = TTI.getNumberOfParts(VecTy); if (VecNumParts == VecTy->getNumElements()) @@ -7102,8 +7112,8 @@ // Need to round up the subvector type vectorization factor to avoid a // crash in cost model functions. Make SubVT so that Idx + VF of SubVT // <= EENumElts. - auto *SubVT = - FixedVectorType::get(VecTy->getElementType(), EENumElts - Idx); + FixedVectorType *SubVT = + makeScalarTyToVectorTy(VecTy->getElementType(), EENumElts - Idx); Cost += TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector, EEVTy, std::nullopt, CostKind, Idx, SubVT); } @@ -7174,7 +7184,7 @@ "Expected vector length for the final value before action."); Value *V = Vec.dyn_cast(); if (!Vec.isNull() && !V) - V = Constant::getNullValue(FixedVectorType::get( + V = Constant::getNullValue(makeScalarTyToVectorTy( Vec.get()->Scalars.front()->getType(), CommonMask.size())); Action(V, CommonMask); @@ -7210,16 +7220,17 @@ ScalarTy = CI->getOperand(0)->getType(); else if (auto *IE = dyn_cast(VL[0])) ScalarTy = IE->getOperand(1)->getType(); - auto *VecTy = FixedVectorType::get(ScalarTy, VL.size()); + FixedVectorType *VecTy = makeScalarTyToVectorTy(ScalarTy, VL.size()); TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; // If we have computed a smaller type for the expression, update VecTy so // that the costs will be accurate. if (MinBWs.count(VL[0])) - VecTy = FixedVectorType::get( + VecTy = makeScalarTyToVectorTy( IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size()); unsigned EntryVF = E->getVectorFactor(); - auto *FinalVecTy = FixedVectorType::get(VecTy->getElementType(), EntryVF); + FixedVectorType *FinalVecTy = + makeScalarTyToVectorTy(VecTy->getElementType(), EntryVF); bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); if (E->State == TreeEntry::NeedToGather) { @@ -7307,10 +7318,10 @@ return Estimator.finalize( E->ReuseShuffleIndices, E->Scalars.size(), [&](Value *&Vec, SmallVectorImpl &Mask) { - Vec = Estimator.gather(GatheredScalars, - Constant::getNullValue(FixedVectorType::get( - GatheredScalars.front()->getType(), - GatheredScalars.size()))); + Vec = Estimator.gather( + GatheredScalars, Constant::getNullValue(makeScalarTyToVectorTy( + GatheredScalars.front()->getType(), + GatheredScalars.size()))); }); } if (!all_of(GatheredScalars, PoisonValue::classof)) { @@ -7318,7 +7329,7 @@ bool SameGathers = VL.equals(Gathers); Value *BV = Estimator.gather( Gathers, SameGathers ? nullptr - : Constant::getNullValue(FixedVectorType::get( + : Constant::getNullValue(makeScalarTyToVectorTy( GatheredScalars.front()->getType(), GatheredScalars.size()))); SmallVector ReuseMask(Gathers.size(), PoisonMaskElem); @@ -7500,7 +7511,7 @@ NumElts = ATy->getNumElements(); else NumElts = AggregateTy->getStructNumElements(); - SrcVecTy = FixedVectorType::get(ScalarTy, NumElts); + SrcVecTy = makeScalarTyToVectorTy(ScalarTy, NumElts); } if (I->hasOneUse()) { Instruction *Ext = I->user_back(); @@ -7595,8 +7606,8 @@ // need to shift the vector. // Do not calculate the cost if the actual size is the register size and // we can merge this shuffle with the following SK_Select. - auto *InsertVecTy = - FixedVectorType::get(SrcVecTy->getElementType(), InsertVecSz); + FixedVectorType *InsertVecTy = + makeScalarTyToVectorTy(SrcVecTy->getElementType(), InsertVecSz); if (!IsIdentity) Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, InsertVecTy, Mask); @@ -7612,8 +7623,8 @@ buildUseMask(NumElts, InsertMask, UseMask::UndefsAsMask)); if (!InMask.all() && NumScalars != NumElts && !IsWholeSubvector) { if (InsertVecSz != VecSz) { - auto *ActualVecTy = - FixedVectorType::get(SrcVecTy->getElementType(), VecSz); + FixedVectorType *ActualVecTy = + makeScalarTyToVectorTy(SrcVecTy->getElementType(), VecSz); Cost += TTI->getShuffleCost(TTI::SK_InsertSubvector, ActualVecTy, std::nullopt, CostKind, OffsetBeg - Offset, InsertVecTy); @@ -7652,7 +7663,7 @@ }; auto GetVectorCost = [=](InstructionCost CommonCost) { Type *SrcTy = VL0->getOperand(0)->getType(); - auto *SrcVecTy = FixedVectorType::get(SrcTy, VL.size()); + FixedVectorType *SrcVecTy = makeScalarTyToVectorTy(SrcTy, VL.size()); InstructionCost VecCost = CommonCost; // Check if the values are candidates to demote. if (!MinBWs.count(VL0) || VecTy != SrcVecTy) @@ -7693,7 +7704,8 @@ VI); }; auto GetVectorCost = [&](InstructionCost CommonCost) { - auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size()); + FixedVectorType *MaskTy = + makeScalarTyToVectorTy(Builder.getInt1Ty(), VL.size()); InstructionCost VecCost = TTI->getCmpSelInstrCost( E->getOpcode(), VecTy, MaskTy, VecPred, CostKind, VL0); @@ -7893,7 +7905,8 @@ VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind); } else if (auto *CI0 = dyn_cast(VL0)) { - auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size()); + FixedVectorType *MaskTy = + makeScalarTyToVectorTy(Builder.getInt1Ty(), VL.size()); VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, CI0->getPredicate(), CostKind, VL0); VecCost += TTI->getCmpSelInstrCost( @@ -7903,8 +7916,8 @@ } else { Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType(); Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType(); - auto *Src0Ty = FixedVectorType::get(Src0SclTy, VL.size()); - auto *Src1Ty = FixedVectorType::get(Src1SclTy, VL.size()); + FixedVectorType *Src0Ty = makeScalarTyToVectorTy(Src0SclTy, VL.size()); + FixedVectorType *Src1Ty = makeScalarTyToVectorTy(Src1SclTy, VL.size()); VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty, TTI::CastContextHint::None, CostKind); VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty, @@ -8184,7 +8197,7 @@ auto *ScalarTy = II->getType(); if (auto *VectorTy = dyn_cast(ScalarTy)) ScalarTy = VectorTy->getElementType(); - V.push_back(FixedVectorType::get(ScalarTy, BundleWidth)); + V.push_back(makeScalarTyToVectorTy(ScalarTy, BundleWidth)); } Cost += NumCalls * TTI->getCostOfKeepingLiveOverCall(V); } @@ -8480,14 +8493,15 @@ // If we plan to rewrite the tree in a smaller type, we will need to sign // extend the extracted value back to the original type. Here, we account // for the extract and the added cost of the sign extend if needed. - auto *VecTy = FixedVectorType::get(EU.Scalar->getType(), BundleWidth); + FixedVectorType *VecTy = + makeScalarTyToVectorTy(EU.Scalar->getType(), BundleWidth); TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; auto *ScalarRoot = VectorizableTree[0]->Scalars[0]; if (MinBWs.count(ScalarRoot)) { auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first); auto Extend = MinBWs[ScalarRoot].second ? Instruction::SExt : Instruction::ZExt; - VecTy = FixedVectorType::get(MinTy, BundleWidth); + VecTy = makeScalarTyToVectorTy(MinTy, BundleWidth); ExtractCost += TTI->getExtractWithExtendCost(Extend, EU.Scalar->getType(), VecTy, EU.Lane); } else { @@ -8513,7 +8527,7 @@ OrigMask.begin()); C = TTI->getShuffleCost( TTI::SK_PermuteSingleSrc, - FixedVectorType::get(TE->getMainOp()->getType(), VecVF), OrigMask); + makeScalarTyToVectorTy(TE->getMainOp()->getType(), VecVF), OrigMask); LLVM_DEBUG( dbgs() << "SLP: Adding cost " << C << " for final shuffle of insertelement external users.\n"; @@ -8527,7 +8541,7 @@ for (int I = 0, E = FirstUsers.size(); I < E; ++I) { Value *Base = cast(FirstUsers[I].first)->getOperand(0); unsigned VF = ShuffleMasks[I].begin()->second.size(); - auto *FTy = FixedVectorType::get( + FixedVectorType *FTy = makeScalarTyToVectorTy( cast(FirstUsers[I].first->getType())->getElementType(), VF); auto Vector = ShuffleMasks[I].takeVector(); auto &&EstimateShufflesCost = [this, FTy, @@ -8932,7 +8946,7 @@ Type *ScalarTy = VL[0]->getType(); if (StoreInst *SI = dyn_cast(VL[0])) ScalarTy = SI->getValueOperand()->getType(); - auto *VecTy = FixedVectorType::get(ScalarTy, VL.size()); + FixedVectorType *VecTy = makeScalarTyToVectorTy(ScalarTy, VL.size()); bool DuplicateNonConst = false; // Find the cost of inserting/extracting values from the vector. // Check if the same elements are inserted several times and count them as @@ -9194,7 +9208,7 @@ }; Value *Val0 = isa(VL[0]) ? cast(VL[0])->getValueOperand() : VL[0]; - FixedVectorType *VecTy = FixedVectorType::get(Val0->getType(), VL.size()); + FixedVectorType *VecTy = makeScalarTyToVectorTy(Val0->getType(), VL.size()); Value *Vec = Root ? Root : PoisonValue::get(VecTy); SmallVector NonConsts; // Insert constant values at first. @@ -9317,7 +9331,7 @@ } Value *createIdentity(Value *V) { return V; } Value *createPoison(Type *Ty, unsigned VF) { - return PoisonValue::get(FixedVectorType::get(Ty, VF)); + return PoisonValue::get(makeScalarTyToVectorTy(Ty, VF)); } /// Resizes 2 input vector to match the sizes, if the they are not equal /// yet. The smallest vector is resized to the size of the larger vector. @@ -9395,8 +9409,8 @@ return nullptr; // Postpone gather emission, will be emitted after the end of the // process to keep correct order. - auto *VecTy = FixedVectorType::get(E->Scalars.front()->getType(), - E->getVectorFactor()); + FixedVectorType *VecTy = makeScalarTyToVectorTy( + E->Scalars.front()->getType(), E->getVectorFactor()); return Builder.CreateAlignedLoad( VecTy, PoisonValue::get(PointerType::getUnqual(VecTy->getContext())), MaybeAlign()); @@ -9912,7 +9926,7 @@ ShuffleBuilder.add(Vec1, ExtractMask); IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1); } else { - ShuffleBuilder.add(PoisonValue::get(FixedVectorType::get( + ShuffleBuilder.add(PoisonValue::get(makeScalarTyToVectorTy( ScalarTy, GatheredScalars.size())), ExtractMask); } @@ -10063,7 +10077,7 @@ ScalarTy = Store->getValueOperand()->getType(); else if (auto *IE = dyn_cast(VL0)) ScalarTy = IE->getOperand(1)->getType(); - auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size()); + FixedVectorType *VecTy = makeScalarTyToVectorTy(ScalarTy, E->Scalars.size()); switch (ShuffleOrOp) { case Instruction::PHI: { assert((E->ReorderIndices.empty() || @@ -10504,7 +10518,7 @@ // Add return type if intrinsic is overloaded on it. if (isVectorIntrinsicWithOverloadTypeAtArg(IID, -1)) TysForDecl.push_back( - FixedVectorType::get(CI->getType(), E->Scalars.size())); + makeScalarTyToVectorTy(CI->getType(), E->Scalars.size())); for (int j = 0, e = CI->arg_size(); j < e; ++j) { ValueList OpVL; // Some intrinsics have scalar arguments. This argument should not be @@ -10738,7 +10752,7 @@ } auto BundleWidth = VectorizableTree[0]->Scalars.size(); auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first); - auto *VecTy = FixedVectorType::get(MinTy, BundleWidth); + FixedVectorType *VecTy = makeScalarTyToVectorTy(MinTy, BundleWidth); auto *Trunc = Builder.CreateTrunc(VectorRoot, VecTy); VectorizableTree[0]->VectorizedValue = Trunc; } @@ -11189,8 +11203,8 @@ return SM1.size() - LastUndefsCnt > 1 && TTI->getNumberOfParts(SI1->getType()) == TTI->getNumberOfParts( - FixedVectorType::get(SI1->getType()->getElementType(), - SM1.size() - LastUndefsCnt)); + makeScalarTyToVectorTy(SI1->getType()->getElementType(), + SM1.size() - LastUndefsCnt)); }; // Perform O(N^2) search over the gather/shuffle sequences and merge identical // instructions. TODO: We can further optimize this scan if we split the @@ -12500,7 +12514,7 @@ // No actual vectorization should happen, if number of parts is the same as // provided vectorization factor (i.e. the scalar type is used for vector // code during codegen). - auto *VecTy = FixedVectorType::get(ScalarTy, VF); + FixedVectorType *VecTy = makeScalarTyToVectorTy(ScalarTy, VF); if (TTI->getNumberOfParts(VecTy) == VF) continue; for (unsigned I = NextInst; I < MaxInst; ++I) { @@ -13747,7 +13761,7 @@ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; Value *FirstReducedVal = ReducedVals.front(); Type *ScalarTy = FirstReducedVal->getType(); - FixedVectorType *VectorTy = FixedVectorType::get(ScalarTy, ReduxWidth); + FixedVectorType *VectorTy = makeScalarTyToVectorTy(ScalarTy, ReduxWidth); InstructionCost VectorCost = 0, ScalarCost; // If all of the reduced values are constant, the vector cost is 0, since // the reduction value can be calculated at the compile time.