diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6668,6 +6668,23 @@ llvm_unreachable("Two different buildvectors not expected."); } +namespace { +/// Returns incoming Value *, if the requested type is Value * too, or a default +/// value, otherwise. +struct ValueSelect { + template + static typename std::enable_if::value, Value *>::type + get(Value *V) { + return V; + } + template + static typename std::enable_if::value, U>::type + get(Value *) { + return U(); + } +}; +} // namespace + /// Does the analysis of the provided shuffle masks and performs the requested /// actions on the vectors with the given shuffle masks. It tries to do it in /// several steps. @@ -6700,6 +6717,9 @@ else Mask[Idx] = (Res.second ? Idx : Mask[Idx]) + VF; } + auto *V = ValueSelect::get(Base); + assert((!V || GetVF(V) == Mask.size()) && + "Expected base vector of VF number of elements."); Prev = Action(Mask, {nullptr, Res.first}); } else if (ShuffleMask.size() == 1) { // Base is undef and only 1 vector is shuffled - perform the action only for @@ -8105,6 +8125,17 @@ return vectorizeTree(ExternallyUsedValues); } +namespace { +/// Data type for handling buildvector sequences with the reused scalars from +/// other tree entries. +struct ShuffledInsertData { + /// List of insertelements to be replaced by shuffles. + SmallVector InsertElements; + /// The parent vectors and shuffle mask for the given list of inserts. + MapVector> ValueMasks; +}; +} // namespace + Value * BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { // All blocks must be scheduled before any instructions are inserted. @@ -8138,6 +8169,9 @@ LLVM_DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() << " values .\n"); + SmallVector ShuffledInserts; + // Maps vector instruction to original insertelement instruction + DenseMap VectorToInsertElement; // Extract all of the elements with the external uses. for (const auto &ExternalUse : ExternalUses) { Value *Scalar = ExternalUse.Scalar; @@ -8177,6 +8211,8 @@ assert(isa(Scalar->getType()) && isa(Scalar) && "In-tree scalar of vector type is not insertelement?"); + auto *IE = cast(Scalar); + VectorToInsertElement.try_emplace(Vec, IE); return Vec; }; // If User == nullptr, the Scalar is used as extra arg. Generate @@ -8205,6 +8241,64 @@ continue; } + if (auto *VU = dyn_cast(User)) { + if (!Scalar->getType()->isVectorTy()) { + if (auto *FTy = dyn_cast(User->getType())) { + Optional InsertIdx = getInsertIndex(VU); + if (InsertIdx) { + auto *It = + find_if(ShuffledInserts, [VU](const ShuffledInsertData &Data) { + // Checks if 2 insertelements are from the same buildvector. + InsertElementInst *VecInsert = Data.InsertElements.front(); + return areTwoInsertFromSameBuildVector(VU, VecInsert); + }); + unsigned Idx = *InsertIdx; + if (It == ShuffledInserts.end()) { + (void)ShuffledInserts.emplace_back(); + It = std::next(ShuffledInserts.begin(), + ShuffledInserts.size() - 1); + SmallVectorImpl &Mask = It->ValueMasks[Vec]; + if (Mask.empty()) + Mask.assign(FTy->getNumElements(), UndefMaskElem); + // Find the insertvector, vectorized in tree, if any. + Value *Base = VU; + while (auto *IEBase = dyn_cast(Base)) { + if (IEBase != User && + (!IEBase->hasOneUse() || + getInsertIndex(IEBase).getValueOr(Idx) == Idx)) + break; + // Build the mask for the vectorized insertelement instructions. + if (const TreeEntry *E = getTreeEntry(IEBase)) { + do { + IEBase = cast(Base); + int IEIdx = *getInsertIndex(IEBase); + assert(Mask[Idx] == UndefMaskElem && + "InsertElementInstruction used already."); + Mask[IEIdx] = IEIdx; + Base = IEBase->getOperand(0); + } while (E == getTreeEntry(Base)); + break; + } + Base = cast(Base)->getOperand(0); + // After the vectorization the def-use chain has changed, need + // to look through original insertelement instructions, if they + // get replaced by vector instructions. + auto It = VectorToInsertElement.find(Base); + if (It != VectorToInsertElement.end()) + Base = It->second; + } + } + SmallVectorImpl &Mask = It->ValueMasks[Vec]; + if (Mask.empty()) + Mask.assign(FTy->getNumElements(), UndefMaskElem); + Mask[Idx] = ExternalUse.Lane; + It->InsertElements.push_back(cast(User)); + continue; + } + } + } + } + // Generate extracts for out-of-tree users. // Find the insertion point for the extractelement lane. if (auto *VecI = dyn_cast(Vec)) { @@ -8240,6 +8334,219 @@ LLVM_DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n"); } + // Checks if the mask is an identity mask. + auto &&IsIdentityMask = [](ArrayRef Mask, FixedVectorType *VecTy) { + int Limit = Mask.size(); + return VecTy->getNumElements() == Mask.size() && + all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) && + ShuffleVectorInst::isIdentityMask(Mask); + }; + // Tries to combine 2 different masks into single one. + auto &&CombineMasks = [](SmallVectorImpl &Mask, ArrayRef ExtMask) { + SmallVector NewMask(ExtMask.size(), UndefMaskElem); + for (int I = 0, Sz = ExtMask.size(); I < Sz; ++I) { + if (ExtMask[I] == UndefMaskElem) + continue; + NewMask[I] = Mask[ExtMask[I]]; + } + Mask.swap(NewMask); + }; + // Peek through shuffles, trying to simplify the final shuffle code. + auto &&PeekThroughShuffles = + [&IsIdentityMask, &CombineMasks](Value *&V, SmallVectorImpl &Mask, + bool CheckForLengthChange = false) { + while (auto *SV = dyn_cast(V)) { + // Exit if not a fixed vector type or changing size shuffle. + if (!isa(SV->getType()) || + (CheckForLengthChange && SV->changesLength())) + break; + // Exit if the identity or broadcast mask is found. + if (IsIdentityMask(Mask, cast(SV->getType())) || + SV->isZeroEltSplat()) + break; + bool IsOp1Undef = isUndefVector(SV->getOperand(0)); + bool IsOp2Undef = isUndefVector(SV->getOperand(1)); + if (!IsOp1Undef && !IsOp2Undef) + break; + SmallVector ShuffleMask(SV->getShuffleMask().begin(), + SV->getShuffleMask().end()); + CombineMasks(ShuffleMask, Mask); + Mask.swap(ShuffleMask); + if (IsOp2Undef) + V = SV->getOperand(0); + else + V = SV->getOperand(1); + } + }; + // Smart shuffle instruction emission, walks through shuffles trees and + // tries to find the best matching vector for the actual shuffle + // instruction. + auto &&CreateShuffle = [this, &IsIdentityMask, &PeekThroughShuffles, + &CombineMasks](Value *V1, Value *V2, + ArrayRef Mask) -> Value * { + assert(V1 && "Expected at least one vector value."); + if (V2 && !isUndefVector(V2)) { + // Peek through shuffles. + Value *Op1 = V1; + Value *Op2 = V2; + int VF = + cast(V1->getType())->getElementCount().getKnownMinValue(); + SmallVector CombinedMask1(Mask.size(), UndefMaskElem); + SmallVector CombinedMask2(Mask.size(), UndefMaskElem); + for (int I = 0, E = Mask.size(); I < E; ++I) { + if (Mask[I] < VF) + CombinedMask1[I] = Mask[I]; + else + CombinedMask2[I] = Mask[I] - VF; + } + Value *PrevOp1; + Value *PrevOp2; + do { + PrevOp1 = Op1; + PrevOp2 = Op2; + PeekThroughShuffles(Op1, CombinedMask1, /*CheckForLengthChange=*/true); + PeekThroughShuffles(Op2, CombinedMask2, /*CheckForLengthChange=*/true); + // Check if we have 2 resizing shuffles - need to peek through operands + // again. + if (auto *SV1 = dyn_cast(Op1)) + if (auto *SV2 = dyn_cast(Op2)) + if (SV1->getOperand(0)->getType() == + SV2->getOperand(0)->getType() && + SV1->getOperand(0)->getType() != SV1->getType() && + isUndefVector(SV1->getOperand(1)) && + isUndefVector(SV2->getOperand(1))) { + Op1 = SV1->getOperand(0); + Op2 = SV2->getOperand(0); + SmallVector ShuffleMask1(SV1->getShuffleMask().begin(), + SV1->getShuffleMask().end()); + CombineMasks(ShuffleMask1, CombinedMask1); + CombinedMask1.swap(ShuffleMask1); + SmallVector ShuffleMask2(SV2->getShuffleMask().begin(), + SV2->getShuffleMask().end()); + CombineMasks(ShuffleMask2, CombinedMask2); + CombinedMask2.swap(ShuffleMask2); + } + } while (PrevOp1 != Op1 || PrevOp2 != Op2); + VF = cast(Op1->getType()) + ->getElementCount() + .getKnownMinValue(); + for (int I = 0, E = Mask.size(); I < E; ++I) { + if (CombinedMask2[I] != UndefMaskElem) { + assert(CombinedMask1[I] == UndefMaskElem && + "Expected undefined mask element"); + CombinedMask1[I] = CombinedMask2[I] + (Op1 == Op2 ? 0 : VF); + } + } + Value *Vec = Builder.CreateShuffleVector( + Op1, Op1 == Op2 ? PoisonValue::get(Op1->getType()) : Op2, + CombinedMask1); + if (auto *I = dyn_cast(Vec)) { + GatherShuffleSeq.insert(I); + CSEBlocks.insert(I->getParent()); + } + return Vec; + } + if (isa(V1)) + return PoisonValue::get(FixedVectorType::get( + cast(V1->getType())->getElementType(), Mask.size())); + Value *Op = V1; + SmallVector CombinedMask(Mask.begin(), Mask.end()); + PeekThroughShuffles(Op, CombinedMask); + if (!isa(Op->getType()) || + !IsIdentityMask(CombinedMask, cast(Op->getType()))) { + Value *Vec = Builder.CreateShuffleVector(Op, CombinedMask); + if (auto *I = dyn_cast(Vec)) { + GatherShuffleSeq.insert(I); + CSEBlocks.insert(I->getParent()); + } + return Vec; + } + return Op; + }; + + auto &&ResizeToVF = [&CreateShuffle](Value *Vec, ArrayRef Mask) { + unsigned VF = Mask.size(); + unsigned VecVF = cast(Vec->getType())->getNumElements(); + if (VF != VecVF) { + if (any_of(Mask, [VF](int Idx) { return Idx >= static_cast(VF); })) { + Vec = CreateShuffle(Vec, nullptr, Mask); + return std::make_pair(Vec, true); + } + SmallVector ResizeMask(VF, UndefMaskElem); + for (unsigned I = 0; I < VF; ++I) { + if (Mask[I] != UndefMaskElem) + ResizeMask[Mask[I]] = Mask[I]; + } + Vec = CreateShuffle(Vec, nullptr, ResizeMask); + } + + return std::make_pair(Vec, false); + }; + // Perform shuffling of the vectorize tree entries for better handling of + // external extracts. + for (int I = 0, E = ShuffledInserts.size(); I < E; ++I) { + // Find the first and the last instruction in the list of insertelements. + sort(ShuffledInserts[I].InsertElements, isFirstInsertElement); + InsertElementInst *FirstInsert = ShuffledInserts[I].InsertElements.front(); + InsertElementInst *LastInsert = ShuffledInserts[I].InsertElements.back(); + Builder.SetInsertPoint(LastInsert); + auto Vector = ShuffledInserts[I].ValueMasks.takeVector(); + Value *NewInst = performExtractsShuffleAction( + makeMutableArrayRef(Vector.data(), Vector.size()), + FirstInsert->getOperand(0), + [](Value *Vec) { + return cast(Vec->getType()) + ->getElementCount() + .getKnownMinValue(); + }, + ResizeToVF, + [FirstInsert, &CreateShuffle](ArrayRef Mask, + ArrayRef Vals) { + assert((Vals.size() == 1 || Vals.size() == 2) && + "Expected exactly 1 or 2 input values."); + if (Vals.size() == 1) { + // Do not create shuffle if the mask is a simple identity + // non-resizing mask. + if (Mask.size() != cast(Vals.front()->getType()) + ->getNumElements() || + !ShuffleVectorInst::isIdentityMask(Mask)) + return CreateShuffle(Vals.front(), nullptr, Mask); + return Vals.front(); + } + return CreateShuffle(Vals.front() ? Vals.front() + : FirstInsert->getOperand(0), + Vals.back(), Mask); + }); + auto It = ShuffledInserts[I].InsertElements.rbegin(); + // Rebuild buildvector chain. + InsertElementInst *II = nullptr; + if (It != ShuffledInserts[I].InsertElements.rend()) + II = *It; + SmallVector Inserts; + while (It != ShuffledInserts[I].InsertElements.rend()) { + assert(II && "Must be an insertelement instruction."); + if (*It == II) + ++It; + else + Inserts.push_back(cast(II)); + II = dyn_cast(II->getOperand(0)); + } + for (Instruction *II : reverse(Inserts)) { + II->replaceUsesOfWith(II->getOperand(0), NewInst); + if (auto *NewI = dyn_cast(NewInst)) + if (II->getParent() == NewI->getParent() && II->comesBefore(NewI)) + II->moveAfter(NewI); + NewInst = II; + } + LastInsert->replaceAllUsesWith(NewInst); + for (InsertElementInst *IE : reverse(ShuffledInserts[I].InsertElements)) { + IE->replaceUsesOfWith(IE->getOperand(1), + PoisonValue::get(IE->getOperand(1)->getType())); + eraseInstruction(IE); + } + CSEBlocks.insert(LastInsert->getParent()); + } + // For each vectorized value: for (auto &TEPtr : VectorizableTree) { TreeEntry *Entry = TEPtr.get(); diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll @@ -1312,19 +1312,19 @@ ; CHECK-NEXT: [[TMP58:%.*]] = shl nsw <16 x i32> [[TMP57]], ; CHECK-NEXT: [[TMP59:%.*]] = add nsw <16 x i32> [[TMP58]], [[TMP56]] ; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> undef, <16 x i32> +; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP62:%.*]] = add nsw <16 x i32> [[TMP60]], [[TMP61]] ; CHECK-NEXT: [[TMP63:%.*]] = sub nsw <16 x i32> [[TMP60]], [[TMP61]] ; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> -; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> +; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP64]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP66:%.*]] = add nsw <16 x i32> [[TMP64]], [[TMP65]] ; CHECK-NEXT: [[TMP67:%.*]] = sub nsw <16 x i32> [[TMP64]], [[TMP65]] ; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> -; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> +; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <16 x i32> [[TMP68]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP70:%.*]] = add nsw <16 x i32> [[TMP68]], [[TMP69]] ; CHECK-NEXT: [[TMP71:%.*]] = sub nsw <16 x i32> [[TMP68]], [[TMP69]] ; CHECK-NEXT: [[TMP72:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> -; CHECK-NEXT: [[TMP73:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> +; CHECK-NEXT: [[TMP73:%.*]] = shufflevector <16 x i32> [[TMP72]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP74:%.*]] = add nsw <16 x i32> [[TMP72]], [[TMP73]] ; CHECK-NEXT: [[TMP75:%.*]] = sub nsw <16 x i32> [[TMP72]], [[TMP73]] ; CHECK-NEXT: [[TMP76:%.*]] = shufflevector <16 x i32> [[TMP74]], <16 x i32> [[TMP75]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll @@ -139,7 +139,7 @@ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_1]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll @@ -139,7 +139,7 @@ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_1]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll @@ -26,19 +26,15 @@ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[GEP3]] to <2 x float>* ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP6]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP14]], i32 3 -; CHECK-NEXT: [[TMP16:%.*]] = fmul fast <4 x float> [[TMP9]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[GEP0]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[TMP16]], <4 x float>* [[TMP17]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> [[TMP6]], <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <4 x float> [[TMP9]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast float* [[GEP0]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP12]], <4 x float>* [[TMP13]], align 4 ; CHECK-NEXT: ret void ; %gep0 = getelementptr inbounds float, float* %a, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll @@ -10,12 +10,9 @@ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 ; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> ; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4 ; CHECK-NEXT: ret void ; @@ -46,14 +43,10 @@ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 -; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT_I5_I10:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 -; CHECK-NEXT: [[DOTSROA_0_4_VEC_INSERT_I10_I13:%.*]] = insertelement <2 x float> [[DOTSROA_0_0_VEC_INSERT_I5_I10]], float [[TMP9]], i64 1 -; CHECK-NEXT: store <2 x float> [[DOTSROA_0_4_VEC_INSERT_I10_I13]], ptr null, align 4 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[DOTSROA_0_4_VEC_INSERT_I10_I13_2:%.*]] = insertelement <2 x float> [[DOTSROA_0_0_VEC_INSERT_I5_I10]], float [[TMP10]], i64 1 -; CHECK-NEXT: store <2 x float> [[DOTSROA_0_4_VEC_INSERT_I10_I13_2]], ptr null, align 4 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP8]], ptr null, align 4 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP9]], ptr null, align 4 ; CHECK-NEXT: ret void ; %1 = getelementptr inbounds float, ptr undef, i32 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll @@ -4,13 +4,12 @@ define void @b() { ; CHECK-LABEL: @b( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> poison, float 0x7FF8000000000000, i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> , <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 0x7FF8000000000000, i32 3 -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP2]], <4 x float> zeroinitializer, <4 x float> zeroinitializer) -; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x float> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = fdiv <4 x float> [[TMP4]], zeroinitializer -; CHECK-NEXT: store <4 x float> [[TMP5]], ptr undef, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> , <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP1]], <4 x float> zeroinitializer, <4 x float> zeroinitializer) +; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = fdiv <4 x float> [[TMP3]], zeroinitializer +; CHECK-NEXT: store <4 x float> [[TMP4]], ptr undef, align 4 ; CHECK-NEXT: ret void ; entry: @@ -50,8 +49,7 @@ ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 -; CHECK-NEXT: [[AGG:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[LOOP]] ; entry: @@ -71,7 +69,6 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[DOTSROA_025_4_VEC_INSERT_US_I:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 ; CHECK-NEXT: br label [[LOOP]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll @@ -52,16 +52,12 @@ ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x float> zeroinitializer, <4 x float> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP6]], i32 0 -; CHECK-NEXT: [[RETVAL_SROA_0_0_VEC_INSERT4:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP7]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP6]], i32 1 -; CHECK-NEXT: [[RETVAL_SROA_0_4_VEC_INSERT7:%.*]] = insertelement <2 x float> [[RETVAL_SROA_0_0_VEC_INSERT4]], float [[TMP8]], i64 1 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[TMP6]], i32 2 -; CHECK-NEXT: [[RETVAL_SROA_7_8_VEC_INSERT11:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP6]], i32 3 -; CHECK-NEXT: [[RETVAL_SROA_7_12_VEC_INSERT13:%.*]] = insertelement <2 x float> [[RETVAL_SROA_7_8_VEC_INSERT11]], float [[TMP10]], i64 1 -; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[RETVAL_SROA_0_4_VEC_INSERT7]], 0 -; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[RETVAL_SROA_7_12_VEC_INSERT13]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP7]], <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP9]], <2 x i32> +; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[TMP8]], 0 +; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[TMP10]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } zeroinitializer ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll @@ -13,21 +13,19 @@ ; CHECK-NEXT: [[RANGE20_I:%.*]] = getelementptr inbounds [[STRUCT_CLZMADEC_1_28_55_82_103_124_145_166_181_196_229_259_334:%.*]], %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* [[P:%.*]], i64 0, i32 4 ; CHECK-NEXT: br label [[DO_BODY66_I:%.*]] ; CHECK: do.body66.i: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP5:%.*]], [[DO_COND_I:%.*]] ], [ undef, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], [[DO_COND_I:%.*]] ], [ undef, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> , i32 [[TMP2]], i32 1 ; CHECK-NEXT: br i1 undef, label [[DO_COND_I]], label [[IF_ELSE_I:%.*]] ; CHECK: if.else.i: -; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP1]], undef +; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i32> [[TMP1]], undef ; CHECK-NEXT: br label [[DO_COND_I]] ; CHECK: do.cond.i: -; CHECK-NEXT: [[TMP5]] = phi <2 x i32> [ [[TMP4]], [[IF_ELSE_I]] ], [ [[TMP3]], [[DO_BODY66_I]] ] +; CHECK-NEXT: [[TMP3]] = phi <2 x i32> [ [[TMP2]], [[IF_ELSE_I]] ], [ [[TMP1]], [[DO_BODY66_I]] ] ; CHECK-NEXT: br i1 undef, label [[DO_BODY66_I]], label [[DO_END1006_I:%.*]] ; CHECK: do.end1006.i: -; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[RANGE20_I]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP6]], <2 x i32>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[RANGE20_I]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll @@ -36,17 +36,15 @@ ; CHECK: if.then325: ; CHECK-NEXT: br label [[IF_END327]] ; CHECK: if.end327: -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> , float [[TMP4]], i32 0 ; CHECK-NEXT: br i1 undef, label [[IF_THEN329:%.*]], label [[IF_END332]] ; CHECK: if.then329: ; CHECK-NEXT: br label [[IF_END332]] ; CHECK: if.end332: -; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x float> [ [[TMP5]], [[IF_THEN329]] ], [ [[TMP5]], [[IF_END327]] ], [ , [[IF_THEN291]] ] -; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x float> [[TMP3]], [[TMP6]] +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x float> [ [[TMP1]], [[IF_THEN329]] ], [ [[TMP1]], [[IF_END327]] ], [ , [[IF_THEN291]] ] +; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x float> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[ARRAYIDX_I_I606:%.*]] = getelementptr inbounds [[CLASS_BTVECTOR3_23_221_463_485_507_573_595_683_727_749_815_837_991_1585_1607_1629_1651_1849_2047_2069_2091_2113:%.*]], %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113* [[VERTICES:%.*]], i64 0, i32 0, i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[ARRAYIDX_I_I606]] to <2 x float>* -; CHECK-NEXT: store <2 x float> [[TMP7]], <2 x float>* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_I_I606]] to <2 x float>* +; CHECK-NEXT: store <2 x float> [[TMP5]], <2 x float>* [[TMP6]], align 4 ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll @@ -32,7 +32,7 @@ ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP11]], i32 1 ; CHECK-NEXT: [[ADD13]] = fadd float [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[ADD13]], i32 1 ; CHECK-NEXT: [[TMP16:%.*]] = fcmp olt <2 x float> [[TMP15]], ; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x float> [[TMP15]], <2 x float> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll @@ -35,7 +35,7 @@ ; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP11]], [[TMP9]] ; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> [[TMP6]], <2 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[TMP14]], undef ; CHECK-NEXT: switch i32 undef, label [[BB1:%.*]] [ ; CHECK-NEXT: i32 0, label [[BB2:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll @@ -129,11 +129,10 @@ ; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds double, double* [[INBUF:%.*]], i64 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[INBUF]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll @@ -20,18 +20,15 @@ ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 -; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 -; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]]) +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 -; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i32 0 -; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]]) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) ; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP7]] = fadd <2 x double> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll @@ -20,18 +20,15 @@ ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 -; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> undef, double [[TMP5]], i32 0 -; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]]) +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 -; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> undef, double [[TMP6]], i32 0 -; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]]) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) ; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP7]] = fadd <2 x double> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll @@ -115,16 +115,15 @@ ; CHECK-NEXT: br i1 undef, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]] ; CHECK: if.then38: ; CHECK-NEXT: [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY_5_11_53_95_137_191_197_203_239_257_263_269_275_281_287_293_383_437_443_455_461_599_601:%.*]], %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601* undef, i64 0, i32 1, i32 0 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> , double undef, i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> undef, [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> undef, [[TMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = fmul <2 x double> undef, undef +; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> undef, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> undef, [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> undef, [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> undef, [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> undef, [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> undef, [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> undef, [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[AGG_TMP74663_SROA_0_0_IDX]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> undef, [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[AGG_TMP74663_SROA_0_0_IDX]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8 ; CHECK-NEXT: br label [[RETURN:%.*]] ; CHECK: if.then78: ; CHECK-NEXT: br label [[RETURN]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll @@ -82,23 +82,19 @@ ; CHECK-NEXT: ret float [[ADD]] ; ; THRESH1-LABEL: @f_used_twice_in_tree( -; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 -; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 -; THRESH1-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 -; THRESH1-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[X]] -; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 -; THRESH1-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 -; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]] +; THRESH1-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> +; THRESH1-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], [[X]] +; THRESH1-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; THRESH1-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] ; THRESH1-NEXT: ret float [[ADD]] ; ; THRESH2-LABEL: @f_used_twice_in_tree( -; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 -; THRESH2-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 -; THRESH2-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 -; THRESH2-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[X]] -; THRESH2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 -; THRESH2-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 -; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]] +; THRESH2-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> +; THRESH2-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], [[X]] +; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] ; THRESH2-NEXT: ret float [[ADD]] ; %x0 = extractelement <2 x float> %x, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -896,12 +896,11 @@ ; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 1 ; THRESHOLD-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[MUL]], i32 0 ; THRESHOLD-NEXT: [[TMP5:%.*]] = sitofp <2 x i32> [[TMP4]] to <2 x float> -; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0 -; THRESHOLD-NEXT: [[TMP7:%.*]] = insertelement <2 x float> , float [[TMP6]], i32 0 -; THRESHOLD-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[TMP5]], [[TMP7]] -; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0 -; THRESHOLD-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1 -; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP9]], [[TMP10]] +; THRESHOLD-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> , <2 x float> [[TMP5]], <2 x i32> +; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[TMP5]], [[TMP6]] +; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0 +; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP7]], i32 1 +; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP8]], [[TMP9]] ; THRESHOLD-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[TMP2]], [[OP_RDX2]] ; THRESHOLD-NEXT: ret float [[OP_RDX3]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -1100,15 +1100,14 @@ ; THRESH-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] ; THRESH-NEXT: [[TMP11:%.*]] = insertelement <2 x i1> poison, i1 [[OP_RDX]], i32 0 ; THRESH-NEXT: [[TMP12:%.*]] = insertelement <2 x i1> [[TMP11]], i1 [[TMP5]], i32 1 -; THRESH-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP9]], i32 0 -; THRESH-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP3]], i32 1 -; THRESH-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0 -; THRESH-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP4]], i32 1 -; THRESH-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP14]], <2 x i32> [[TMP16]] -; THRESH-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP17]], i32 0 -; THRESH-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP17]], i32 1 -; THRESH-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; THRESH-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP18]], i32 [[TMP19]] +; THRESH-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP2]], <2 x i32> +; THRESH-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <2 x i32> +; THRESH-NEXT: [[TMP15:%.*]] = shufflevector <2 x i32> [[TMP14]], <2 x i32> [[TMP2]], <2 x i32> +; THRESH-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP15]] +; THRESH-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0 +; THRESH-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP16]], i32 1 +; THRESH-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] +; THRESH-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP17]], i32 [[TMP18]] ; THRESH-NEXT: [[OP_RDX4:%.*]] = icmp sgt i32 [[TMP8]], [[OP_RDX3]] ; THRESH-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[TMP8]], i32 [[OP_RDX3]] ; THRESH-NEXT: ret i32 [[OP_RDX5]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll @@ -439,12 +439,9 @@ ; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @reschedule_extract( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -477,12 +474,9 @@ ; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @take_credit( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -539,12 +533,9 @@ ; NOTHRESHOLD-NEXT: ret <8 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @_vadd256( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[B:%.*]], i32 7 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <8 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <8 x float> [[TMP5]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <8 x float> [[TMP2]] ; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %b, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -474,12 +474,9 @@ ; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @reschedule_extract( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -512,12 +509,9 @@ ; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @take_credit( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -574,12 +568,9 @@ ; NOTHRESHOLD-NEXT: ret <8 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @_vadd256( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[B:%.*]], i32 7 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <8 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <8 x float> [[TMP5]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <8 x float> [[TMP2]] ; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %b, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll @@ -19,16 +19,10 @@ ; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> [[TMP6]], poison ; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], poison ; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[TMP8]], poison -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 -; CHECK-NEXT: [[VEC1:%.*]] = insertelement <2 x float> undef, float [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP9]], i32 1 -; CHECK-NEXT: [[VEC2:%.*]] = insertelement <2 x float> [[VEC1]], float [[TMP11]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i32 2 -; CHECK-NEXT: [[VEC3:%.*]] = insertelement <2 x float> undef, float [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP9]], i32 3 -; CHECK-NEXT: [[VEC4:%.*]] = insertelement <2 x float> [[VEC3]], float [[TMP13]], i32 1 -; CHECK-NEXT: [[INS1:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VEC2]], 0 -; CHECK-NEXT: [[INS2:%.*]] = insertvalue { <2 x float>, <2 x float> } [[INS1]], <2 x float> [[VEC4]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[INS1:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP10]], 0 +; CHECK-NEXT: [[INS2:%.*]] = insertvalue { <2 x float>, <2 x float> } [[INS1]], <2 x float> [[TMP11]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[INS2]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll @@ -9,10 +9,9 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> , i32 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP0]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4 ; CHECK-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll @@ -58,18 +58,11 @@ ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP7]], i32 2 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP9]], i32 3 -; CHECK-NEXT: [[TMP11:%.*]] = mul <4 x i32> [[TMP2]], [[TMP10]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP12]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: ret i32 undef ; %in.addr = getelementptr inbounds i32, i32* %in, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll @@ -39,14 +39,13 @@ ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i32 1 ; CHECK-NEXT: store float [[TMP15]], float* @f, align 4 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> , float [[CONV19]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[SHUFFLE]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP16]], float [[TMP17]], i32 2 -; CHECK-NEXT: [[TMP19:%.*]] = fsub <4 x float> [[TMP11]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = fadd <4 x float> [[TMP11]], [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> [[TMP20]], <4 x i32> -; CHECK-NEXT: [[TMP22:%.*]] = fptosi <4 x float> [[TMP21]] to <4 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[ARRAYIDX1]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP23]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x float> [[TMP16]], <4 x float> [[SHUFFLE]], <4 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = fsub <4 x float> [[TMP11]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = fadd <4 x float> [[TMP11]], [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> [[TMP19]], <4 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = fptosi <4 x float> [[TMP20]] to <4 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[ARRAYIDX1]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP21]], <4 x i32>* [[TMP22]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll @@ -59,9 +59,8 @@ ; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> poison, float [[X0]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[I21:%.*]] = shufflevector <4 x float> [[I0]], <4 x float> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I21]], float [[TMP4]], i32 3 -; CHECK-NEXT: ret <4 x float> [[I3]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[I21]], <4 x float> [[TMP3]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[TMP4]] ; %gep0 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 0 %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll @@ -59,9 +59,8 @@ ; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X0]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[I21:%.*]] = shufflevector <4 x float> [[I0]], <4 x float> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I21]], float [[TMP4]], i32 3 -; CHECK-NEXT: ret <4 x float> [[I3]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[I21]], <4 x float> [[TMP3]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[TMP4]] ; %gep0 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 0 %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll @@ -722,15 +722,11 @@ ; SSE-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 ; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> ; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] -; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 1 -; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 -; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 0 -; SSE-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1 -; SSE-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP1]], [[TMP8]] -; SSE-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP4]], [[TMP9]] -; SSE-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP10]], i32 0 -; SSE-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP10]], i32 1 -; SSE-NEXT: [[ADD3:%.*]] = fadd double [[TMP11]], [[TMP12]] +; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] +; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] +; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 +; SSE-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 +; SSE-NEXT: [[ADD3:%.*]] = fadd double [[TMP7]], [[TMP8]] ; SSE-NEXT: ret double [[ADD3]] ; ; AVX-LABEL: @splat_loads( @@ -791,17 +787,13 @@ ; SSE-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 ; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> ; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] -; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 1 -; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 -; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 0 -; SSE-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1 -; SSE-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP1]], [[TMP8]] -; SSE-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP4]], [[TMP9]] -; SSE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP5]], i32 1 -; SSE-NEXT: [[TMP12:%.*]] = fsub <2 x double> [[TMP10]], [[TMP11]] -; SSE-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[TMP12]], i32 0 -; SSE-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[TMP12]], i32 1 -; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP13]], [[TMP14]] +; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] +; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer +; SSE-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP6]], [[TMP7]] +; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0 +; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1 +; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP9]], [[TMP10]] ; SSE-NEXT: ret double [[RES]] ; ; AVX-LABEL: @splat_loads_with_internal_uses( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll @@ -29,10 +29,9 @@ ; CHECK: if.then: ; CHECK-NEXT: [[AND0_TMP:%.*]] = and i64 [[TMP8]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[AND0_TMP]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP10]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i64> [[TMP11]], [[TMP7]] -; CHECK-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP9]], <2 x i64> [[TMP6]], <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = and <2 x i64> [[TMP10]], [[TMP7]] +; CHECK-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll @@ -151,7 +151,7 @@ ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[R_052:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[TMP2]], [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[TMP2]], [[ENTRY]] ], [ [[TMP16:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x float> [ [[TMP5]], [[ENTRY]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0 ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP8]], 7.000000e+00 @@ -163,25 +163,22 @@ ; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[ARRAYIDX19]] to <2 x float>* ; CHECK-NEXT: [[TMP12]] = load <2 x float>, <2 x float>* [[TMP11]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> poison, float [[TMP13]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP10]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x float> [[TMP15]], <4 x float> [[TMP16]], <4 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = fmul <4 x float> [[TMP17]], -; CHECK-NEXT: [[TMP19]] = fadd <4 x float> [[TMP6]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP20]], 121 +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> [[TMP12]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = fmul <4 x float> [[TMP14]], +; CHECK-NEXT: [[TMP16]] = fadd <4 x float> [[TMP6]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP17]], 121 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP19]], i32 0 -; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP21]] -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP19]], i32 1 -; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP22]] -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x float> [[TMP19]], i32 2 -; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP23]] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x float> [[TMP19]], i32 3 -; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP24]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[TMP16]], i32 0 +; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[TMP16]], i32 1 +; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP16]], i32 2 +; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP16]], i32 3 +; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP21]] ; CHECK-NEXT: ret float [[ADD31]] ; entry: @@ -244,27 +241,20 @@ ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ , [[ENTRY]] ], [ [[TMP9:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP5]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP9]] = fmul <4 x float> [[TMP8]], +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ , [[ENTRY]] ], [ [[TMP2:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2]] = fmul <4 x float> [[TMP1]], ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], 128 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP9]], i32 1 -; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[TMP10]], [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i32 2 -; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP9]], i32 3 -; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP13]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 +; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 +; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 +; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP6]] ; CHECK-NEXT: ret float [[ADD31]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll @@ -10,16 +10,10 @@ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0 -; CHECK-NEXT: [[VECIN0:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP3]], i32 1 -; CHECK-NEXT: [[VECIN1:%.*]] = insertelement <2 x float> [[VECIN0]], float [[TMP5]], i64 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP3]], i32 2 -; CHECK-NEXT: [[VECIN2:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP3]], i32 3 -; CHECK-NEXT: [[VECIN3:%.*]] = insertelement <2 x float> [[VECIN2]], float [[TMP7]], i64 1 -; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VECIN1]], 0 -; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[VECIN3]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP4]], 0 +; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP5]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[RET1]] ; %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll @@ -10,16 +10,10 @@ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0 -; CHECK-NEXT: [[VECIN0:%.*]] = insertelement <2 x float> undef, float [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP3]], i32 1 -; CHECK-NEXT: [[VECIN1:%.*]] = insertelement <2 x float> [[VECIN0]], float [[TMP5]], i64 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP3]], i32 2 -; CHECK-NEXT: [[VECIN2:%.*]] = insertelement <2 x float> undef, float [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP3]], i32 3 -; CHECK-NEXT: [[VECIN3:%.*]] = insertelement <2 x float> [[VECIN2]], float [[TMP7]], i64 1 -; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VECIN1]], 0 -; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[VECIN3]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP4]], 0 +; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP5]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[RET1]] ; %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll @@ -10,18 +10,10 @@ ; CHECK: bb3: ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ poison, [[BB2:%.*]] ], [ zeroinitializer, [[BB1:%.*]] ] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP1]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP1]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP1]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP1]], i32 5 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP1]], i32 6 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP1]], i32 7 -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[SHUFFLE]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[SHUFFLE]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP2]], [[TMP3]] ; CHECK-NEXT: [[TMP65:%.*]] = sext i32 [[OP_RDX]] to i64 ; CHECK-NEXT: ret i64 [[TMP65]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll @@ -9,12 +9,10 @@ ; CHECK-NEXT: br label [[T:%.*]] ; CHECK: t: ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i16> [[LD]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[P0]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* [[TMP3]], align 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP0]], <8 x i16>* [[TMP1]], align 2 ; CHECK-NEXT: ret void ; ; YAML: Pass: slp-vectorizer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll @@ -12,22 +12,15 @@ ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[SHUFFLE]] ; CHECK-NEXT: [[TMP7:%.*]] = shl nsw <4 x i32> [[TMP6]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP11]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP13]], i32 2 -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP8]], i32 2 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP15]], i32 3 -; CHECK-NEXT: [[TMP17:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = sub nsw <4 x i32> [[TMP8]], [[TMP16]] -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP18]], <4 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP19]] -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP20]], <4 x i32> [[TMP21]], <4 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP23]], align 16 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = sub nsw <4 x i32> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP16]], align 16 ; CHECK-NEXT: ret void ; %1 = getelementptr inbounds i8, i8* undef, i64 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll b/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll @@ -7,21 +7,18 @@ ; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[ISEC:%.*]], i32 0 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX10]] to <2 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: br i1 false, label [[BLOCK1:%.*]], label [[BLOCK3:%.*]] ; CHECK: block1: ; CHECK-NEXT: br i1 false, label [[BLOCK2:%.*]], label [[BLOCK3]] ; CHECK: block2: ; CHECK-NEXT: br label [[BLOCK3]] ; CHECK: block3: -; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ [[TMP1]], [[BLOCK1]] ], [ [[TMP1]], [[BLOCK2]] ], [ [[TMP5]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], [[TMP7]] -; CHECK-NEXT: ret i32 [[TMP9]] +; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ [[TMP1]], [[BLOCK1]] ], [ [[TMP1]], [[BLOCK2]] ], [ [[TMP2]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP4]] +; CHECK-NEXT: ret i32 [[TMP6]] ; entry: %arrayidx10 = getelementptr inbounds i32, i32* %isec, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll @@ -49,15 +49,9 @@ ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP10]], i32 0 -; CHECK-NEXT: [[T69:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP12]], i32 4 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP10]], i32 1 -; CHECK-NEXT: [[T70:%.*]] = insertelement <8 x i32> [[T69]], i32 [[TMP13]], i32 5 -; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T70]], i32 [[T34]], i32 6 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP10]], i32 3 -; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[TMP14]], i32 7 -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[T34]], i32 6 +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], ; CHECK-NEXT: [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>* ; CHECK-NEXT: store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll @@ -49,15 +49,9 @@ ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP10]], i32 0 -; CHECK-NEXT: [[T69:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP12]], i32 4 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP10]], i32 1 -; CHECK-NEXT: [[T70:%.*]] = insertelement <8 x i32> [[T69]], i32 [[TMP13]], i32 5 -; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T70]], i32 [[T34]], i32 6 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP10]], i32 3 -; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[TMP14]], i32 7 -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[T34]], i32 6 +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], ; CHECK-NEXT: [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>* ; CHECK-NEXT: store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll @@ -12,7 +12,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[CONV]], i32 1 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP18:%.*]], [[BB3:%.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP14:%.*]], [[BB3:%.*]] ] ; CHECK-NEXT: [[TMP3:%.*]] = load double, double* undef, align 8 ; CHECK-NEXT: br i1 undef, label [[BB3]], label [[BB4:%.*]] ; CHECK: bb4: @@ -23,17 +23,13 @@ ; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x double> poison, double [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x double> [[TMP11]], double [[TMP12]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = fcmp ogt <4 x double> [[TMP13]], [[TMP4]] -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = fptrunc <4 x double> [[TMP15]] to <4 x float> -; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP14]], <4 x float> [[TMP2]], <4 x float> [[TMP16]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <4 x double> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = fptrunc <4 x double> [[TMP10]] to <4 x float> +; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[TMP2]], <4 x float> [[TMP12]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP18]] = phi <4 x float> [ [[TMP17]], [[BB4]] ], [ [[TMP2]], [[BB2]] ] +; CHECK-NEXT: [[TMP14]] = phi <4 x float> [ [[TMP13]], [[BB4]] ], [ [[TMP2]], [[BB2]] ] ; CHECK-NEXT: br label [[BB2]] ; entry: