diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -8176,21 +8176,37 @@ SmallVector InsertMask(NumElts, UndefMaskElem); for (unsigned I = 0; I < NumElts; I++) { if (Mask[I] != UndefMaskElem) - InsertMask[Offset + I] = NumElts + I; - } - if (Offset != 0 || - !isUndefVector(FirstInsert->getOperand(0), InsertMask)) { - for (unsigned I = 0; I < NumElts; I++) { - if (InsertMask[I] == UndefMaskElem) - InsertMask[I] = I; - } - - V = Builder.CreateShuffleVector( - FirstInsert->getOperand(0), V, InsertMask, - cast(E->Scalars.back())->getName()); - if (auto *I = dyn_cast(V)) { - GatherShuffleSeq.insert(I); - CSEBlocks.insert(I->getParent()); + InsertMask[Offset + I] = I; + } + bool IsFirstUndef = isUndefVector(FirstInsert->getOperand(0), InsertMask); + if ((!IsIdentity || Offset != 0 || !IsFirstUndef) && + NumElts != NumScalars) { + if (IsFirstUndef) { + if (!ShuffleVectorInst::isIdentityMask(InsertMask)) { + V = Builder.CreateShuffleVector( + V, InsertMask, cast(E->Scalars.back())->getName()); + if (auto *I = dyn_cast(V)) { + GatherShuffleSeq.insert(I); + CSEBlocks.insert(I->getParent()); + } + // Create freeze for undef values. + if (!isa(FirstInsert->getOperand(0))) + V = Builder.CreateFreeze(V); + } + } else { + for (unsigned I = 0; I < NumElts; I++) { + if (InsertMask[I] == UndefMaskElem) + InsertMask[I] = I; + else + InsertMask[I] += NumElts; + } + V = Builder.CreateShuffleVector( + FirstInsert->getOperand(0), V, InsertMask, + cast(E->Scalars.back())->getName()); + if (auto *I = dyn_cast(V)) { + GatherShuffleSeq.insert(I); + CSEBlocks.insert(I->getParent()); + } } } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll @@ -11,7 +11,7 @@ ; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float> ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> poison, <8 x i32> [[TMP6]], <8 x i32> +; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll @@ -11,7 +11,8 @@ ; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float> ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> undef, <8 x i32> [[TMP6]], <8 x i32> +; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = freeze <8 x i32> [[VECINS_I_5_I1]] ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll @@ -305,7 +305,7 @@ ; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP13]], <2 x float> [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP18]], <4 x i32> +; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[RD1]] ; %c0 = extractelement <4 x i32> %c, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -339,8 +339,9 @@ ; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP13]], <2 x float> [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP18]], <4 x i32> -; CHECK-NEXT: ret <4 x float> [[RD1]] +; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = freeze <4 x float> [[RD1]] +; CHECK-NEXT: ret <4 x float> [[TMP19]] ; %c0 = extractelement <4 x i32> %c, i32 0 %c1 = extractelement <4 x i32> %c, i32 1