diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -321,36 +321,51 @@ /// Checks if the given value is actually an undefined constant vector. /// Also, if the\p ShuffleMask is not empty, tries to check if the non-masked /// elements actually mask the insertelement buildvector, if any. -static bool isUndefVector(const Value *V, ArrayRef ShuffleMask = None) { - if (isa(V)) - return true; +template +static SmallBitVector isUndefVector(const Value *V, + ArrayRef ShuffleMask = None) { + SmallBitVector Res(ShuffleMask.empty() ? 1 : ShuffleMask.size(), true); + using T = + typename std::conditional::type; + if (isa(V)) + return Res; auto *VecTy = dyn_cast(V->getType()); if (!VecTy) - return false; + return Res.reset(); auto *C = dyn_cast(V); if (!C) { if (!ShuffleMask.empty()) { const Value *Base = V; while (auto *II = dyn_cast(Base)) { + if (isa(II->getOperand(1))) + continue; Base = II->getOperand(0); Optional Idx = getInsertIndex(II); if (!Idx) continue; if (*Idx < ShuffleMask.size() && ShuffleMask[*Idx] == UndefMaskElem) - return false; + Res.reset(*Idx); } - return V != Base && isUndefVector(Base); + // TODO: Add analysis for shuffles here too. + if (V == Base) { + Res.reset(); + } else { + SmallVector SubMask(ShuffleMask.size(), UndefMaskElem); + Res &= isUndefVector(Base, SubMask); + } + } else { + Res.reset(); } - return false; + return Res; } for (unsigned I = 0, E = VecTy->getNumElements(); I != E; ++I) { if (Constant *Elem = C->getAggregateElement(I)) - if (!isa(Elem) && + if (!isa(Elem) && (ShuffleMask.empty() || (I < ShuffleMask.size() && ShuffleMask[I] == UndefMaskElem))) - return false; + Res.reset(I); } - return true; + return Res; } /// Checks if the vector of instructions can be represented as a shuffle, like: @@ -420,7 +435,7 @@ return None; auto *Vec = EI->getVectorOperand(); // We can extractelement from undef or poison vector. - if (isUndefVector(Vec)) + if (isUndefVector(Vec).all()) continue; // All vector operands must have the same number of vector elements. if (cast(Vec->getType())->getNumElements() != Size) @@ -1196,7 +1211,7 @@ // Undefs are always profitable for extractelements. if (!Ex2Idx) return LookAheadHeuristics::ScoreConsecutiveExtracts; - if (isUndefVector(EV2) && EV2->getType() == EV1->getType()) + if (isUndefVector(EV2).all() && EV2->getType() == EV1->getType()) return LookAheadHeuristics::ScoreConsecutiveExtracts; if (EV2 == EV1) { int Idx1 = Ex1Idx->getZExtValue(); @@ -4612,7 +4627,7 @@ if (isa(V)) Key = hash_value(Value::UndefValueVal + 1); if (auto *EI = dyn_cast(V)) { - if (!isUndefVector(EI->getVectorOperand()) && + if (!isUndefVector(EI->getVectorOperand()).all() && !isa(EI->getIndexOperand())) SubKey = hash_value(EI->getVectorOperand()); } @@ -6425,8 +6440,9 @@ // initial vector or inserting a subvector. // TODO: Implement the analysis of the FirstInsert->getOperand(0) // subvector of ActualVecTy. - if (!isUndefVector(FirstInsert->getOperand(0), InsertMask) && - NumScalars != NumElts && !IsWholeSubvector) { + SmallBitVector InMask = + isUndefVector(FirstInsert->getOperand(0), InsertMask); + if (!InMask.all() && NumScalars != NumElts && !IsWholeSubvector) { if (InsertVecSz != VecSz) { auto *ActualVecTy = FixedVectorType::get(SrcVecTy->getElementType(), VecSz); @@ -6435,13 +6451,13 @@ CostKind, OffsetBeg - Offset, InsertVecTy); } else { for (unsigned I = 0, End = OffsetBeg - Offset; I < End; ++I) - Mask[I] = I; + Mask[I] = InMask.test(I) ? UndefMaskElem : I; for (unsigned I = OffsetBeg - Offset, End = OffsetEnd - Offset; I <= End; ++I) if (Mask[I] != UndefMaskElem) Mask[I] = I + VecSz; for (unsigned I = OffsetEnd + 1 - Offset; I < VecSz; ++I) - Mask[I] = I; + Mask[I] = InMask.test(I) ? UndefMaskElem : I; Cost += TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, InsertVecTy, Mask); } } @@ -7117,14 +7133,15 @@ SmallVector Mask(ShuffleMask.begin()->second); auto VMIt = std::next(ShuffleMask.begin()); T *Prev = nullptr; - bool IsBaseNotUndef = !isUndefVector(Base, Mask); - if (IsBaseNotUndef) { + SmallBitVector IsBaseUndef = isUndefVector(Base, Mask); + if (!IsBaseUndef.all()) { // Base is not undef, need to combine it with the next subvectors. std::pair Res = ResizeAction(ShuffleMask.begin()->first, Mask, /*ForSingleMask=*/false); + SmallBitVector IsBasePoison = isUndefVector(Base, Mask); for (unsigned Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) { if (Mask[Idx] == UndefMaskElem) - Mask[Idx] = Idx; + Mask[Idx] = IsBasePoison.test(Idx) ? UndefMaskElem : Idx; else Mask[Idx] = (Res.second ? Idx : Mask[Idx]) + VF; } @@ -7180,6 +7197,7 @@ } VMIt = std::next(VMIt); } + bool IsBaseNotUndef = !IsBaseUndef.all(); // Perform requested actions for the remaining masks/vectors. for (auto E = ShuffleMask.end(); VMIt != E; ++VMIt) { // Shuffle other input vectors, if any. @@ -8184,25 +8202,36 @@ if (Mask[I] != UndefMaskElem) InsertMask[Offset + I] = I; } - bool IsFirstUndef = isUndefVector(FirstInsert->getOperand(0), InsertMask); - if ((!IsIdentity || Offset != 0 || !IsFirstUndef) && + SmallBitVector IsFirstUndef = + isUndefVector(FirstInsert->getOperand(0), InsertMask); + if ((!IsIdentity || Offset != 0 || !IsFirstUndef.all()) && NumElts != NumScalars) { - if (IsFirstUndef) { + if (IsFirstUndef.all()) { if (!ShuffleVectorInst::isIdentityMask(InsertMask)) { + SmallBitVector IsFirstPoison = + isUndefVector(FirstInsert->getOperand(0), InsertMask); + if (!IsFirstPoison.all()) { + for (unsigned I = 0; I < NumElts; I++) { + if (InsertMask[I] == UndefMaskElem && !IsFirstPoison.test(I)) + InsertMask[I] = I + NumElts; + } + } V = Builder.CreateShuffleVector( - V, InsertMask, cast(E->Scalars.back())->getName()); + V, + IsFirstPoison.all() ? PoisonValue::get(V->getType()) + : FirstInsert->getOperand(0), + InsertMask, cast(E->Scalars.back())->getName()); if (auto *I = dyn_cast(V)) { GatherShuffleExtractSeq.insert(I); CSEBlocks.insert(I->getParent()); } - // Create freeze for undef values. - if (!isa(FirstInsert->getOperand(0))) - V = Builder.CreateFreeze(V); } } else { + SmallBitVector IsFirstPoison = + isUndefVector(FirstInsert->getOperand(0), InsertMask); for (unsigned I = 0; I < NumElts; I++) { if (InsertMask[I] == UndefMaskElem) - InsertMask[I] = I; + InsertMask[I] = IsFirstPoison.test(I) ? UndefMaskElem : I; else InsertMask[I] += NumElts; } @@ -8885,8 +8914,8 @@ if (IsIdentityMask(Mask, cast(SV->getType())) || SV->isZeroEltSplat()) break; - bool IsOp1Undef = isUndefVector(SV->getOperand(0), Mask); - bool IsOp2Undef = isUndefVector(SV->getOperand(1), Mask); + bool IsOp1Undef = isUndefVector(SV->getOperand(0), Mask).all(); + bool IsOp2Undef = isUndefVector(SV->getOperand(1), Mask).all(); if (!IsOp1Undef && !IsOp2Undef) break; SmallVector ShuffleMask(SV->getShuffleMask().begin(), @@ -8906,7 +8935,7 @@ &CombineMasks](Value *V1, Value *V2, ArrayRef Mask) -> Value * { assert(V1 && "Expected at least one vector value."); - if (V2 && !isUndefVector(V2, Mask)) { + if (V2 && !isUndefVector(V2, Mask).all()) { // Peek through shuffles. Value *Op1 = V1; Value *Op2 = V2; @@ -8934,8 +8963,8 @@ if (SV1->getOperand(0)->getType() == SV2->getOperand(0)->getType() && SV1->getOperand(0)->getType() != SV1->getType() && - isUndefVector(SV1->getOperand(1), CombinedMask1) && - isUndefVector(SV2->getOperand(1), CombinedMask2)) { + isUndefVector(SV1->getOperand(1), CombinedMask1).all() && + isUndefVector(SV2->getOperand(1), CombinedMask2).all()) { Op1 = SV1->getOperand(0); Op2 = SV2->getOperand(0); SmallVector ShuffleMask1(SV1->getShuffleMask().begin(), diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll @@ -28,7 +28,7 @@ ; CHECK-NEXT: [[LD4:%.*]] = load float, float* [[GEP4]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[LD4]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[LD1]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[LD1]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll @@ -11,8 +11,7 @@ ; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float> ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = freeze <8 x i32> [[VECINS_I_5_I1]] +; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll @@ -5,7 +5,7 @@ ; CHECK-LABEL: @b( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> poison, float 0x7FF8000000000000, i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> , <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 0x7FF8000000000000, i32 3 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP2]], <4 x float> zeroinitializer, <4 x float> zeroinitializer) ; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x float> [[TMP3]], diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -339,9 +339,8 @@ ; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP13]], <2 x float> [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP19:%.*]] = freeze <4 x float> [[RD1]] -; CHECK-NEXT: ret <4 x float> [[TMP19]] +; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[RD1]] ; %c0 = extractelement <4 x i32> %c, i32 0 %c1 = extractelement <4 x i32> %c, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll @@ -23,7 +23,7 @@ ; CHECK-NEXT: [[TMP4:%.*]] = lshr <8 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <16 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SHR_4_I_I]], i32 5 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SHR_5_I_I]], i32 6 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[SHR_6_I_I]], i32 7 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll @@ -17,7 +17,7 @@ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, <2 x ptr> [[TMP5]], <2 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG]], i32 3 ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x ptr> [[TMP6]], <2 x ptr> poison, <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x ptr> [[TMP7]], <4 x ptr> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x ptr> [[TMP7]], <4 x ptr> [[TMP8]], <4 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x ptr> [[TMP9]], ptr [[ARG_1]], i32 2 ; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP10]], i32 8, <4 x i1> , <4 x i64> poison) ; CHECK-NEXT: br label [[JOIN]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll @@ -178,9 +178,9 @@ ; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[TMP6]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <8 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i32> [[TMP12]], <8 x i32> [[TMP13]], <8 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[ARRAYIDX2]] to <8 x i32>* diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll @@ -44,7 +44,7 @@ ; CHECK-NEXT: [[T67:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T32]], i32 2 ; CHECK-NEXT: [[T68:%.*]] = insertelement <8 x i32> [[T67]], i32 [[T49]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP7]], <8 x i32> +; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP7]], <8 x i32> ; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6 ; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7 ; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]],