diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -319,20 +319,34 @@ } /// Checks if the given value is actually an undefined constant vector. -static bool isUndefVector(const Value *V) { +static bool isUndefVector(const Value *V, ArrayRef ShuffleMask = None) { if (isa(V)) return true; - auto *C = dyn_cast(V); - if (!C) - return false; - if (!C->containsUndefOrPoisonElement()) - return false; - auto *VecTy = dyn_cast(C->getType()); + auto *VecTy = dyn_cast(V->getType()); if (!VecTy) return false; + auto *C = dyn_cast(V); + if (!C) { + if (!ShuffleMask.empty()) { + while (auto *II = dyn_cast(V)) { + if (!II->hasOneUse()) + return false; + V = II->getOperand(0); + Optional Idx = getInsertIndex(II); + if (!Idx) + continue; + if (*Idx < ShuffleMask.size() && ShuffleMask[*Idx] == UndefMaskElem) + return false; + } + return isUndefVector(V); + } + return false; + } for (unsigned I = 0, E = VecTy->getNumElements(); I != E; ++I) { if (Constant *Elem = C->getAggregateElement(I)) - if (!isa(Elem)) + if (!isa(Elem) && + (ShuffleMask.empty() || + (I < ShuffleMask.size() && ShuffleMask[I] == UndefMaskElem))) return false; } return true; @@ -6360,8 +6374,10 @@ // initial vector or inserting a subvector. // TODO: Implement the analysis of the FirstInsert->getOperand(0) // subvector of ActualVecTy. - if (!isUndefVector(FirstInsert->getOperand(0)) && NumScalars != NumElts && - !IsWholeSubvector) { + SmallVector InsertMask(NumElts, UndefMaskElem); + copy(Mask, std::next(InsertMask.begin(), OffsetBeg)); + if (!isUndefVector(FirstInsert->getOperand(0), InsertMask) && + NumScalars != NumElts && !IsWholeSubvector) { if (InsertVecSz != VecSz) { auto *ActualVecTy = FixedVectorType::get(SrcVecTy->getElementType(), VecSz); @@ -7056,7 +7072,7 @@ SmallVector Mask(ShuffleMask.begin()->second); auto VMIt = std::next(ShuffleMask.begin()); T *Prev = nullptr; - bool IsBaseNotUndef = !isUndefVector(Base); + bool IsBaseNotUndef = !isUndefVector(Base, Mask); if (IsBaseNotUndef) { // Base is not undef, need to combine it with the next subvectors. std::pair Res = @@ -8106,14 +8122,16 @@ } } - if ((!IsIdentity || Offset != 0 || - !isUndefVector(FirstInsert->getOperand(0))) && - NumElts != NumScalars) { - SmallVector InsertMask(NumElts); - std::iota(InsertMask.begin(), InsertMask.end(), 0); + SmallVector InsertMask(NumElts, UndefMaskElem); + for (unsigned I = 0; I < NumElts; I++) { + if (Mask[I] != UndefMaskElem) + InsertMask[Offset + I] = NumElts + I; + } + if (Offset != 0 || + !isUndefVector(FirstInsert->getOperand(0), InsertMask)) { for (unsigned I = 0; I < NumElts; I++) { - if (Mask[I] != UndefMaskElem) - InsertMask[Offset + I] = NumElts + I; + if (InsertMask[I] == UndefMaskElem) + InsertMask[I] = I; } V = Builder.CreateShuffleVector( @@ -8792,8 +8810,8 @@ if (IsIdentityMask(Mask, cast(SV->getType())) || SV->isZeroEltSplat()) break; - bool IsOp1Undef = isUndefVector(SV->getOperand(0)); - bool IsOp2Undef = isUndefVector(SV->getOperand(1)); + bool IsOp1Undef = isUndefVector(SV->getOperand(0), Mask); + bool IsOp2Undef = isUndefVector(SV->getOperand(1), Mask); if (!IsOp1Undef && !IsOp2Undef) break; SmallVector ShuffleMask(SV->getShuffleMask().begin(), @@ -8813,7 +8831,7 @@ &CombineMasks](Value *V1, Value *V2, ArrayRef Mask) -> Value * { assert(V1 && "Expected at least one vector value."); - if (V2 && !isUndefVector(V2)) { + if (V2 && !isUndefVector(V2, Mask)) { // Peek through shuffles. Value *Op1 = V1; Value *Op2 = V2; @@ -8841,8 +8859,8 @@ if (SV1->getOperand(0)->getType() == SV2->getOperand(0)->getType() && SV1->getOperand(0)->getType() != SV1->getType() && - isUndefVector(SV1->getOperand(1)) && - isUndefVector(SV2->getOperand(1))) { + isUndefVector(SV1->getOperand(1), CombinedMask1) && + isUndefVector(SV2->getOperand(1), CombinedMask2)) { Op1 = SV1->getOperand(0); Op2 = SV2->getOperand(0); SmallVector ShuffleMask1(SV1->getShuffleMask().begin(), diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll @@ -12,7 +12,7 @@ ; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 ; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> ; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll @@ -53,11 +53,9 @@ ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x float> zeroinitializer, <4 x float> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP7]], <2 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP9]], <2 x i32> -; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[TMP8]], 0 -; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[TMP10]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[TMP7]], 0 +; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[TMP8]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } zeroinitializer ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -44,8 +44,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <8 x i32> -; CHECK-NEXT: [[RD1:%.*]] = shufflevector <8 x float> undef, <8 x float> [[TMP3]], <8 x i32> -; CHECK-NEXT: ret <8 x float> [[RD1]] +; CHECK-NEXT: ret <8 x float> [[TMP3]] ; %c0 = extractelement <4 x i32> %c, i32 0 %c1 = extractelement <4 x i32> %c, i32 1