diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5603,6 +5603,126 @@ return false; } +/// Checks if the \p IE1 instructions is followed by \p IE2 instruction in the +/// buildvector sequence. +static bool isFirstInsertElement(const InsertElementInst *IE1, + const InsertElementInst *IE2) { + const auto *I1 = IE1; + const auto *I2 = IE2; + do { + if (I2 == IE1) + return true; + if (I1 == IE2) + return false; + if (I1) + I1 = dyn_cast(I1->getOperand(0)); + if (I2) + I2 = dyn_cast(I2->getOperand(0)); + } while (I1 || I2); + llvm_unreachable("Two different buildvectors not expected."); +} + +namespace { +/// Returns incoming Value *, if the requested type is Value * too, or a default +/// value, otherwise. +struct ValueSelect { + template + static typename std::enable_if::value, Value *>::type + get(Value *V) { + return V; + } + template + static typename std::enable_if::value, U>::type + get(Value *) { + return U(); + } +}; +} // namespace + +template +static T *performExtractsShuffleAction( + MutableArrayRef>> ShuffleMask, Value *Base, + function_ref GetVF, + function_ref(T *, ArrayRef)> ResizeAction, + function_ref, ArrayRef)> Action) { + assert(!ShuffleMask.empty() && "Empty list of shuffles for inserts."); + SmallVector Mask(ShuffleMask.begin()->second); + auto VMIt = std::next(ShuffleMask.begin()); + T *Prev = nullptr; + bool IsBaseNotUndef = !isUndefVector(Base); + if (IsBaseNotUndef) { + // Base is not undef, need to combine it with the next subvectors. + std::pair Res = ResizeAction(ShuffleMask.begin()->first, Mask); + for (unsigned Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) { + if (Mask[Idx] == UndefMaskElem) + Mask[Idx] = Idx; + else + Mask[Idx] = (Res.second ? Idx : Mask[Idx]) + VF; + } + auto *V = ValueSelect::get(Base); + assert((!V || GetVF(V) == Mask.size()) && + "Expected base vector of VF number of elements."); + Prev = Action(Mask, {V, Res.first}); + } else if (ShuffleMask.size() == 1) { + // Base is undef and only 1 vector is shuffled. + std::pair Res = ResizeAction(ShuffleMask.begin()->first, Mask); + if (Res.second) + Prev = Res.first; + else + Prev = Action(Mask, {ShuffleMask.begin()->first}); + } else { + // Base is undef and at least 2 input vectors shuffled. + unsigned Vec1VF = GetVF(ShuffleMask.begin()->first); + unsigned Vec2VF = GetVF(VMIt->first); + if (Vec1VF == Vec2VF) { + // No need to resize the input vectors since they are of the same size, we + // can shuffle them directly. + ArrayRef SecMask = VMIt->second; + for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) { + if (SecMask[I] != UndefMaskElem) { + assert(Mask[I] == UndefMaskElem && "Multiple uses of scalars."); + Mask[I] = SecMask[I] + Vec1VF; + } + } + Prev = Action(Mask, {ShuffleMask.begin()->first, VMIt->first}); + } else { + // Vectors of different sizes - resize and reshuffle. + std::pair Res1 = + ResizeAction(ShuffleMask.begin()->first, Mask); + std::pair Res2 = ResizeAction(VMIt->first, VMIt->second); + ArrayRef SecMask = VMIt->second; + for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) { + if (Mask[I] != UndefMaskElem) { + assert(SecMask[I] == UndefMaskElem && "Multiple uses of scalars."); + if (Res1.second) + Mask[I] = I; + } else if (SecMask[I] != UndefMaskElem) { + assert(Mask[I] == UndefMaskElem && "Multiple uses of scalars."); + Mask[I] = (Res2.second ? I : SecMask[I]) + VF; + } + } + Prev = Action(Mask, {Res1.first, Res2.first}); + } + VMIt = std::next(VMIt); + } + for (auto E = ShuffleMask.end(); VMIt != E; ++VMIt) { + // Shuffle other input vectors, if any. + std::pair Res = ResizeAction(VMIt->first, VMIt->second); + ArrayRef SecMask = VMIt->second; + for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) { + if (SecMask[I] != UndefMaskElem) { + assert((Mask[I] == UndefMaskElem || IsBaseNotUndef) && + "Multiple uses of scalars."); + Mask[I] = (Res.second ? I : SecMask[I]) + VF; + } else if (Mask[I] != UndefMaskElem) { + Mask[I] = I; + } + } + Prev = Action(Mask, {Prev, Res.first}); + } + return Prev; +} + InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { InstructionCost Cost = 0; LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size " @@ -5623,9 +5743,8 @@ SmallPtrSet ExtractCostCalculated; InstructionCost ExtractCost = 0; - SmallVector VF; - SmallVector> ShuffleMask; - SmallVector FirstUsers; + SmallVector>> ShuffleMasks; + SmallVector> FirstUsers; SmallVector DemandedElts; for (ExternalUser &EU : ExternalUses) { // We only add extract cost once for the same scalar. @@ -5655,37 +5774,49 @@ Optional InsertIdx = getInsertIndex(VU, 0); if (!InsertIdx || *InsertIdx == UndefMaskElem) continue; - auto *It = find_if(FirstUsers, [VU](Value *V) { - return areTwoInsertFromSameBuildVector(VU, - cast(V)); - }); + const TreeEntry *ScalarTE = getTreeEntry(EU.Scalar); + auto *It = + find_if(FirstUsers, + [VU](const std::pair &Pair) { + return areTwoInsertFromSameBuildVector( + VU, cast(Pair.first)); + }); int VecId = -1; if (It == FirstUsers.end()) { - VF.push_back(FTy->getNumElements()); - ShuffleMask.emplace_back(VF.back(), UndefMaskElem); + (void)ShuffleMasks.emplace_back(); // Find the insertvector, vectorized in tree, if any. Value *Base = VU; - while (isa(Base)) { + while (auto *IEBase = dyn_cast(Base)) { // Build the mask for the vectorized insertelement instructions. - if (const TreeEntry *E = getTreeEntry(Base)) { - VU = cast(Base); + if (const TreeEntry *E = getTreeEntry(IEBase)) { + VU = IEBase; do { - int Idx = E->findLaneForValue(Base); - ShuffleMask.back()[Idx] = Idx; + int Idx = E->findLaneForValue(IEBase); + SmallVectorImpl &Mask = ShuffleMasks.back()[ScalarTE]; + if (Mask.empty()) + Mask.assign(FTy->getNumElements(), UndefMaskElem); + Mask[Idx] = Idx; Base = cast(Base)->getOperand(0); } while (E == getTreeEntry(Base)); break; } Base = cast(Base)->getOperand(0); } - FirstUsers.push_back(VU); - DemandedElts.push_back(APInt::getZero(VF.back())); + FirstUsers.emplace_back(VU, ScalarTE); + DemandedElts.push_back(APInt::getZero(FTy->getNumElements())); VecId = FirstUsers.size() - 1; } else { + if (isFirstInsertElement(VU, cast(It->first))) + It->first = VU; VecId = std::distance(FirstUsers.begin(), It); } int Idx = *InsertIdx; - ShuffleMask[VecId][Idx] = EU.Lane; + SmallVectorImpl &Mask = ShuffleMasks[VecId][ScalarTE]; + if (Mask.empty()) + Mask.assign(FTy->getNumElements(), UndefMaskElem); + assert(Mask[Idx] == UndefMaskElem && + "InsertElementInstruction used already."); + Mask[Idx] = EU.Lane; DemandedElts[VecId].setBit(Idx); continue; } @@ -5711,86 +5842,75 @@ InstructionCost SpillCost = getSpillCost(); Cost += SpillCost + ExtractCost; - if (FirstUsers.size() == 1) { - int Limit = ShuffleMask.front().size() * 2; - if (all_of(ShuffleMask.front(), [Limit](int Idx) { return Idx < Limit; }) && - !ShuffleVectorInst::isIdentityMask(ShuffleMask.front())) { - InstructionCost C = TTI->getShuffleCost( + auto &&ResizeToVF = [this, &Cost](const TreeEntry *TE, ArrayRef Mask) { + InstructionCost C = 0; + unsigned VF = Mask.size(); + unsigned VecVF = TE->getVectorFactor(); + if (VF != VecVF && + (any_of(Mask, [VF](int Idx) { return Idx >= static_cast(VF); }) || + (all_of(Mask, + [VF](int Idx) { return Idx < 2 * static_cast(VF); }) && + !ShuffleVectorInst::isIdentityMask(Mask)))) { + SmallVector OrigMask(VecVF, UndefMaskElem); + std::copy(Mask.begin(), std::next(Mask.begin(), std::min(VF, VecVF)), + OrigMask.begin()); + C = TTI->getShuffleCost( TTI::SK_PermuteSingleSrc, - cast(FirstUsers.front()->getType()), - ShuffleMask.front()); - LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C - << " for final shuffle of insertelement external users " - << *VectorizableTree.front()->Scalars.front() << ".\n" - << "SLP: Current total cost = " << Cost << "\n"); + FixedVectorType::get(TE->getMainOp()->getType(), VecVF), OrigMask); + LLVM_DEBUG( + dbgs() << "SLP: Adding cost " << C + << " for final shuffle of insertelement external users.\n"; + TE->dump(); dbgs() << "SLP: Current total cost = " << Cost << "\n"); Cost += C; + return std::make_pair(TE, true); } + return std::make_pair(TE, false); + }; + // Calculate the cost of the reshuffled vectors, if any. + for (int I = 0, E = FirstUsers.size(); I < E; ++I) { + Value *Base = cast(FirstUsers[I].first)->getOperand(0); + unsigned VF = ShuffleMasks[I].begin()->second.size(); + auto *FTy = FixedVectorType::get( + cast(FirstUsers[I].first->getType())->getElementType(), VF); + auto Vector = ShuffleMasks[I].takeVector(); + (void)performExtractsShuffleAction( + makeMutableArrayRef(Vector.data(), Vector.size()), Base, + [](const TreeEntry *E) { return E->getVectorFactor(); }, ResizeToVF, + [this, FTy, &Cost](ArrayRef Mask, + ArrayRef TEs) { + assert((TEs.size() == 1 || TEs.size() == 2) && + "Expected exactly 1 or 2 tree entries."); + if (TEs.size() == 1) { + int Limit = 2 * Mask.size(); + if (!all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) || + !ShuffleVectorInst::isIdentityMask(Mask)) { + InstructionCost C = + TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, FTy, Mask); + LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C + << " for final shuffle of insertelement " + "external users.\n"; + TEs.front()->dump(); + dbgs() + << "SLP: Current total cost = " << Cost << "\n"); + Cost += C; + } + } else { + InstructionCost C = + TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, FTy, Mask); + LLVM_DEBUG(dbgs() + << "SLP: Adding cost " << C + << " for final shuffle of vector node and external " + "insertelement users.\n"; + if (TEs.front()) TEs.front()->dump(); TEs.back()->dump(); + dbgs() << "SLP: Current total cost = " << Cost << "\n"); + Cost += C; + } + return TEs.back(); + }); InstructionCost InsertCost = TTI->getScalarizationOverhead( - cast(FirstUsers.front()->getType()), - DemandedElts.front(), /*Insert*/ true, /*Extract*/ false); - LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost - << " for insertelements gather.\n" - << "SLP: Current total cost = " << Cost << "\n"); - Cost -= InsertCost; - } else if (FirstUsers.size() >= 2) { - unsigned MaxVF = *std::max_element(VF.begin(), VF.end()); - // Combined masks of the first 2 vectors. - SmallVector CombinedMask(MaxVF, UndefMaskElem); - copy(ShuffleMask.front(), CombinedMask.begin()); - APInt CombinedDemandedElts = DemandedElts.front().zextOrSelf(MaxVF); - auto *VecTy = FixedVectorType::get( - cast(FirstUsers.front()->getType())->getElementType(), - MaxVF); - for (int I = 0, E = ShuffleMask[1].size(); I < E; ++I) { - if (ShuffleMask[1][I] != UndefMaskElem) { - CombinedMask[I] = ShuffleMask[1][I] + MaxVF; - CombinedDemandedElts.setBit(I); - } - } - InstructionCost C = - TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, CombinedMask); - LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C - << " for final shuffle of vector node and external " - "insertelement users " - << *VectorizableTree.front()->Scalars.front() << ".\n" - << "SLP: Current total cost = " << Cost << "\n"); - Cost += C; - InstructionCost InsertCost = TTI->getScalarizationOverhead( - VecTy, CombinedDemandedElts, /*Insert*/ true, /*Extract*/ false); - LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost - << " for insertelements gather.\n" - << "SLP: Current total cost = " << Cost << "\n"); + cast(FirstUsers[I].first->getType()), DemandedElts[I], + /*Insert*/ true, /*Extract*/ false); Cost -= InsertCost; - for (int I = 2, E = FirstUsers.size(); I < E; ++I) { - // Other elements - permutation of 2 vectors (the initial one and the - // next Ith incoming vector). - unsigned VF = ShuffleMask[I].size(); - for (unsigned Idx = 0; Idx < VF; ++Idx) { - int Mask = ShuffleMask[I][Idx]; - if (Mask != UndefMaskElem) - CombinedMask[Idx] = MaxVF + Mask; - else if (CombinedMask[Idx] != UndefMaskElem) - CombinedMask[Idx] = Idx; - } - for (unsigned Idx = VF; Idx < MaxVF; ++Idx) - if (CombinedMask[Idx] != UndefMaskElem) - CombinedMask[Idx] = Idx; - InstructionCost C = - TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, CombinedMask); - LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C - << " for final shuffle of vector node and external " - "insertelement users " - << *VectorizableTree.front()->Scalars.front() << ".\n" - << "SLP: Current total cost = " << Cost << "\n"); - Cost += C; - InstructionCost InsertCost = TTI->getScalarizationOverhead( - cast(FirstUsers[I]->getType()), DemandedElts[I], - /*Insert*/ true, /*Extract*/ false); - LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost - << " for insertelements gather.\n" - << "SLP: Current total cost = " << Cost << "\n"); - Cost -= InsertCost; - } } #ifndef NDEBUG @@ -6880,6 +7000,17 @@ return vectorizeTree(ExternallyUsedValues); } +namespace { +/// Data type for handling buildvector sequences with the reused scalars from +/// other tree entries. +struct ShuffledInsertData { + /// List of insertelements to be replaced by shuffles. + SmallVector InsertElements; + /// The parent vectors and shuffle mask for the given list of inserts. + MapVector> ValueMasks; +}; +} // namespace + Value * BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { // All blocks must be scheduled before any instructions are inserted. @@ -6913,6 +7044,9 @@ LLVM_DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() << " values .\n"); + SmallVector ShuffledInserts; + // Maps vector instruction to original insertelement instruction + DenseMap VectorToInsertElement; // Extract all of the elements with the external uses. for (const auto &ExternalUse : ExternalUses) { Value *Scalar = ExternalUse.Scalar; @@ -6952,6 +7086,8 @@ assert(isa(Scalar->getType()) && isa(Scalar) && "In-tree scalar of vector type is not insertelement?"); + auto *IE = cast(Scalar); + VectorToInsertElement.try_emplace(Vec, IE); return Vec; }; // If User == nullptr, the Scalar is used as extra arg. Generate @@ -6980,6 +7116,62 @@ continue; } + if (auto *IE = dyn_cast(User)) { + if (!Scalar->getType()->isVectorTy()) { + if (auto *FTy = dyn_cast(User->getType())) { + Optional InsertIdx = getInsertIndex(IE, 0); + if (!InsertIdx || *InsertIdx == UndefMaskElem) + continue; + auto *It = + find_if(ShuffledInserts, [IE](const ShuffledInsertData &Data) { + // Checks if 2 insertelements are from the same buildvector. + InsertElementInst *VecInsert = Data.InsertElements.front(); + return areTwoInsertFromSameBuildVector(IE, VecInsert); + }); + int Idx = *InsertIdx; + if (It == ShuffledInserts.end()) { + (void)ShuffledInserts.emplace_back(); + It = std::next(ShuffledInserts.begin(), ShuffledInserts.size() - 1); + // Find the insertvector, vectorized in tree, if any. + Value *Base = IE; + InsertElementInst *PrevBase = IE; + while (auto *IEBase = dyn_cast(Base)) { + // Build the mask for the vectorized insertelement instructions. + if (const TreeEntry *E = getTreeEntry(IEBase)) { + assert(IEBase != PrevBase && "Unexpected in-tree user."); + IE = PrevBase; + do { + int Idx = E->findLaneForValue(Base); + SmallVectorImpl &Mask = It->ValueMasks[Vec]; + if (Mask.empty()) + Mask.assign(FTy->getNumElements(), UndefMaskElem); + Mask[Idx] = Idx; + Base = cast(Base)->getOperand(0); + } while (E == getTreeEntry(Base)); + break; + } + PrevBase = cast(Base); + Base = PrevBase->getOperand(0); + // After the vectorization the def-use chain has changed, need to + // look through original insertelement instructions, if they get + // replaced by vector instructions. + auto It = VectorToInsertElement.find(Base); + if (It != VectorToInsertElement.end()) + Base = It->second; + } + } + SmallVectorImpl &Mask = It->ValueMasks[Vec]; + if (Mask.empty()) + Mask.assign(FTy->getNumElements(), UndefMaskElem); + assert(Mask[Idx] == UndefMaskElem && + "InsertElementInstruction used already."); + Mask[Idx] = ExternalUse.Lane; + It->InsertElements.push_back(IE); + continue; + } + } + } + // Generate extracts for out-of-tree users. // Find the insertion point for the extractelement lane. if (auto *VecI = dyn_cast(Vec)) { @@ -7015,6 +7207,85 @@ LLVM_DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n"); } + auto &&ResizeToVF = [this](Value *Vec, ArrayRef Mask) { + unsigned VF = Mask.size(); + unsigned VecVF = cast(Vec->getType())->getNumElements(); + if (VF != VecVF) { + if (any_of(Mask, [VF](int Idx) { return Idx >= static_cast(VF); })) { + Vec = Builder.CreateShuffleVector(Vec, Mask); + return std::make_pair(Vec, true); + } + SmallVector ResizeMask(VF, UndefMaskElem); + for (unsigned I = 0; I < VF; ++I) { + if (Mask[I] != UndefMaskElem) + ResizeMask[Mask[I]] = Mask[I]; + } + Vec = Builder.CreateShuffleVector(Vec, ResizeMask); + } + + return std::make_pair(Vec, false); + }; + // Perform shuffling of the vectorize tree entries for better handling of + // external extracts. + for (int I = 0, E = ShuffledInserts.size(); I < E; ++I) { + // Find the first and the last instruction in the list of insertelements. + sort(ShuffledInserts[I].InsertElements, isFirstInsertElement); + InsertElementInst *FirstInsert = ShuffledInserts[I].InsertElements.front(); + InsertElementInst *LastInsert = ShuffledInserts[I].InsertElements.back(); + Builder.SetInsertPoint(LastInsert); + auto Vector = ShuffledInserts[I].ValueMasks.takeVector(); + Value *NewInst = performExtractsShuffleAction( + makeMutableArrayRef(Vector.data(), Vector.size()), + FirstInsert->getOperand(0), + [](Value *Vec) { + return cast(Vec->getType()) + ->getElementCount() + .getKnownMinValue(); + }, + ResizeToVF, + [this](ArrayRef Mask, ArrayRef Vals) { + assert((Vals.size() == 1 || Vals.size() == 2) && + "Expected exactly 1 or 2 input values."); + if (Vals.size() == 1) { + // Do not create shuffle if the mask is a simple identity + // non-resizing mask. + if (Mask.size() != cast(Vals.front()->getType()) + ->getNumElements() || + !ShuffleVectorInst::isIdentityMask(Mask)) + return Builder.CreateShuffleVector(Vals.front(), Mask); + return Vals.front(); + } + return Builder.CreateShuffleVector(Vals.front(), Vals.back(), Mask); + }); + auto It = ShuffledInserts[I].InsertElements.rbegin(); + // Rebuild buildvector chain. + InsertElementInst *II = nullptr; + if (It != ShuffledInserts[I].InsertElements.rend()) + II = *It; + SmallVector Inserts; + while (It != ShuffledInserts[I].InsertElements.rend()) { + assert(II && "Must be an insertelement instruction."); + if (*It == II) + ++It; + else + Inserts.push_back(cast(II)); + II = dyn_cast(II->getOperand(0)); + } + for (Instruction *II : reverse(Inserts)) { + II->replaceUsesOfWith(II->getOperand(0), NewInst); + if (auto *I = dyn_cast(NewInst)) + II->moveAfter(I); + NewInst = II; + } + for (InsertElementInst *IE : reverse(ShuffledInserts[I].InsertElements)) { + IE->replaceUsesOfWith(IE->getOperand(1), + PoisonValue::get(IE->getOperand(1)->getType())); + eraseInstruction(IE); + } + LastInsert->replaceAllUsesWith(NewInst); + CSEBlocks.insert(LastInsert->getParent()); + } + // For each vectorized value: for (auto &TEPtr : VectorizableTree) { TreeEntry *Entry = TEPtr.get(); diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll @@ -139,7 +139,7 @@ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_0]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_1]], i64 0 ; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> undef, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP5]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll @@ -139,7 +139,7 @@ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_0]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_1]], i64 0 ; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> undef, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP5]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute-inseltpoison.ll @@ -248,12 +248,12 @@ ; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[B3]], i64 0 ; SSE-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B0]], i64 1 ; SSE-NEXT: [[TMP8:%.*]] = fcmp ord <2 x float> [[TMP5]], [[TMP7]] -; SSE-NEXT: [[TMP9:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> poison, <4 x i32> -; SSE-NEXT: [[D0:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> undef, <4 x i32> +; SSE-NEXT: [[TMP9:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> poison, <4 x i32> ; SSE-NEXT: [[TMP10:%.*]] = shufflevector <2 x i1> [[TMP4]], <2 x i1> poison, <4 x i32> -; SSE-NEXT: [[D21:%.*]] = shufflevector <4 x i1> [[D0]], <4 x i1> [[TMP10]], <4 x i32> -; SSE-NEXT: [[D3:%.*]] = shufflevector <4 x i1> [[D21]], <4 x i1> [[TMP9]], <4 x i32> -; SSE-NEXT: [[R:%.*]] = sext <4 x i1> [[D3]] to <4 x i32> +; SSE-NEXT: [[D21:%.*]] = shufflevector <4 x i1> [[TMP9]], <4 x i1> [[TMP10]], <4 x i32> +; SSE-NEXT: [[TMP11:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> poison, <4 x i32> +; SSE-NEXT: [[TMP12:%.*]] = shufflevector <4 x i1> [[D21]], <4 x i1> [[TMP11]], <4 x i32> +; SSE-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i32> ; SSE-NEXT: ret <4 x i32> [[R]] ; ; AVX-LABEL: @fcmp_ord_uno_v4i32( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute.ll @@ -248,12 +248,12 @@ ; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[B3]], i64 0 ; SSE-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B0]], i64 1 ; SSE-NEXT: [[TMP8:%.*]] = fcmp ord <2 x float> [[TMP5]], [[TMP7]] -; SSE-NEXT: [[TMP9:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> poison, <4 x i32> -; SSE-NEXT: [[D0:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> undef, <4 x i32> +; SSE-NEXT: [[TMP9:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> poison, <4 x i32> ; SSE-NEXT: [[TMP10:%.*]] = shufflevector <2 x i1> [[TMP4]], <2 x i1> poison, <4 x i32> -; SSE-NEXT: [[D21:%.*]] = shufflevector <4 x i1> [[D0]], <4 x i1> [[TMP10]], <4 x i32> -; SSE-NEXT: [[D3:%.*]] = shufflevector <4 x i1> [[D21]], <4 x i1> [[TMP9]], <4 x i32> -; SSE-NEXT: [[R:%.*]] = sext <4 x i1> [[D3]] to <4 x i32> +; SSE-NEXT: [[D21:%.*]] = shufflevector <4 x i1> [[TMP9]], <4 x i1> [[TMP10]], <4 x i32> +; SSE-NEXT: [[TMP11:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> poison, <4 x i32> +; SSE-NEXT: [[TMP12:%.*]] = shufflevector <4 x i1> [[D21]], <4 x i1> [[TMP11]], <4 x i32> +; SSE-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i32> ; SSE-NEXT: ret <4 x i32> [[R]] ; ; AVX-LABEL: @fcmp_ord_uno_v4i32( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll @@ -75,7 +75,7 @@ ; AVX-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[TMP11]], i32 0 ; AVX-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP11]], i32 1 ; AVX-NEXT: [[ADD13]] = fadd float [[TMP12]], [[TMP13]] -; AVX-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0 +; AVX-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <2 x i32> ; AVX-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[ADD13]], i32 1 ; AVX-NEXT: [[TMP16:%.*]] = fcmp olt <2 x float> [[TMP15]], ; AVX-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x float> [[TMP15]], <2 x float> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll @@ -35,7 +35,7 @@ ; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> [[TMP6]], <2 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[TMP14]], undef ; CHECK-NEXT: switch i32 undef, label [[BB1:%.*]] [ ; CHECK-NEXT: i32 0, label [[BB2:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll @@ -129,11 +129,10 @@ ; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds double, double* [[INBUF:%.*]], i64 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[INBUF]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll @@ -11,25 +11,24 @@ ; CHECK-NEXT: [[TAB2:%.*]] = alloca [256 x i32], align 16 ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[MUL19:%.*]] = fmul double [[P1:%.*]], 1.638400e+04 ; CHECK-NEXT: [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04 ; CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03 -; CHECK-NEXT: [[MUL21:%.*]] = fmul double [[P2:%.*]], 1.638400e+04 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P1:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P2:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[ADD]], i32 1 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[T_0259:%.*]] = phi double [ 0.000000e+00, [[BB1]] ], [ [[ADD27:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[P3_ADDR_0258:%.*]] = phi double [ [[ADD]], [[BB1]] ], [ [[ADD28:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> poison, double [[T_0259]], i32 0 -; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]]) +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]] -; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, [[TBAA0:!tbaa !.*]] -; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> poison, double [[P3_ADDR_0258]], i32 0 -; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]]) +; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) ; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]] -; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, [[TBAA0]] -; CHECK-NEXT: [[ADD27]] = fadd double [[MUL19]], [[T_0259]] -; CHECK-NEXT: [[ADD28]] = fadd double [[MUL21]], [[P3_ADDR_0258]] +; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll @@ -11,25 +11,24 @@ ; CHECK-NEXT: [[TAB2:%.*]] = alloca [256 x i32], align 16 ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[MUL19:%.*]] = fmul double [[P1:%.*]], 1.638400e+04 ; CHECK-NEXT: [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04 ; CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03 -; CHECK-NEXT: [[MUL21:%.*]] = fmul double [[P2:%.*]], 1.638400e+04 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P1:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P2:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[ADD]], i32 1 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[T_0259:%.*]] = phi double [ 0.000000e+00, [[BB1]] ], [ [[ADD27:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[P3_ADDR_0258:%.*]] = phi double [ [[ADD]], [[BB1]] ], [ [[ADD28:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> undef, double [[T_0259]], i32 0 -; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]]) +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]] -; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa !0 -; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> undef, double [[P3_ADDR_0258]], i32 0 -; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]]) +; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) ; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]] -; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa !0 -; CHECK-NEXT: [[ADD27]] = fadd double [[MUL19]], [[T_0259]] -; CHECK-NEXT: [[ADD28]] = fadd double [[MUL21]], [[P3_ADDR_0258]] +; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll @@ -114,17 +114,16 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 undef, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]] ; CHECK: if.then38: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> , double undef, i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> undef, [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> undef, [[TMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = fmul <2 x double> undef, undef +; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> undef, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> undef, [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> undef, [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> undef, [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> undef, [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> undef, [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> undef, [[TMP6]] +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> undef, [[TMP5]] ; CHECK-NEXT: [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY_5_11_53_95_137_191_197_203_239_257_263_269_275_281_287_293_383_437_443_455_461_599_601:%.*]], %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601* undef, i64 0, i32 1, i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[AGG_TMP74663_SROA_0_0_IDX]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[AGG_TMP74663_SROA_0_0_IDX]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8 ; CHECK-NEXT: br label [[RETURN:%.*]] ; CHECK: if.then78: ; CHECK-NEXT: br label [[RETURN]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll @@ -82,23 +82,19 @@ ; CHECK-NEXT: ret float [[ADD]] ; ; THRESH1-LABEL: @f_used_twice_in_tree( -; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 -; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 -; THRESH1-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 -; THRESH1-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[X]] -; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 -; THRESH1-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 -; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]] +; THRESH1-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> +; THRESH1-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], [[X]] +; THRESH1-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; THRESH1-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] ; THRESH1-NEXT: ret float [[ADD]] ; ; THRESH2-LABEL: @f_used_twice_in_tree( -; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 -; THRESH2-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 -; THRESH2-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 -; THRESH2-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[X]] -; THRESH2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 -; THRESH2-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 -; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]] +; THRESH2-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> +; THRESH2-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], [[X]] +; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] ; THRESH2-NEXT: ret float [[ADD]] ; %x0 = extractelement <2 x float> %x, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extracts-with-undefs.ll b/llvm/test/Transforms/SLPVectorizer/X86/extracts-with-undefs.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/extracts-with-undefs.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extracts-with-undefs.ll @@ -6,28 +6,26 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[BODY:%.*]] ; CHECK: body: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x double> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> , double [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 -; CHECK-NEXT: [[ADD8_I_I:%.*]] = fadd fast double [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[PHI1:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ 0.000000e+00, [[BODY]] ] +; CHECK-NEXT: [[PHI2:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ 0.000000e+00, [[BODY]] ] +; CHECK-NEXT: [[MUL_I478_I:%.*]] = fmul fast double [[PHI1]], 0.000000e+00 +; CHECK-NEXT: [[MUL7_I485_I:%.*]] = fmul fast double undef, 0.000000e+00 +; CHECK-NEXT: [[ADD8_I_I:%.*]] = fadd fast double [[MUL_I478_I]], [[MUL7_I485_I]] ; CHECK-NEXT: [[CMP42_I:%.*]] = fcmp fast ole double [[ADD8_I_I]], 0.000000e+00 ; CHECK-NEXT: br i1 false, label [[BODY]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: br i1 false, label [[IF_THEN135_I:%.*]], label [[IF_END209_I:%.*]] ; CHECK: if.then135.i: -; CHECK-NEXT: [[TMP6:%.*]] = fcmp fast olt <2 x double> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i1> , i1 [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP8]], <2 x double> zeroinitializer, <2 x double> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <2 x double> zeroinitializer, [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <2 x double> [[TMP10]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = fadd fast <2 x double> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[CMP145_I:%.*]] = fcmp fast olt double [[PHI1]], 0.000000e+00 +; CHECK-NEXT: [[CMP152_I:%.*]] = fcmp fast olt double [[PHI2]], 0.000000e+00 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i1> , i1 [[CMP152_I]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[TMP0]], <2 x double> zeroinitializer, <2 x double> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x double> zeroinitializer, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[TMP3]], zeroinitializer ; CHECK-NEXT: br label [[IF_END209_I]] ; CHECK: if.end209.i: -; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x double> [ [[TMP12]], [[IF_THEN135_I]] ], [ zeroinitializer, [[EXIT]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x double> [ [[TMP4]], [[IF_THEN135_I]] ], [ zeroinitializer, [[EXIT]] ] ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -1102,9 +1102,9 @@ ; THRESH-NEXT: [[TMP13:%.*]] = insertelement <2 x i1> poison, i1 [[TMP12]], i32 0 ; THRESH-NEXT: [[TMP14:%.*]] = insertelement <2 x i1> [[TMP13]], i1 [[TMP5]], i32 1 ; THRESH-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 -; THRESH-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP3]], i32 1 +; THRESH-NEXT: [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP15]], <2 x i32> [[TMP2]], <2 x i32> ; THRESH-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0 -; THRESH-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> [[TMP17]], i32 [[TMP4]], i32 1 +; THRESH-NEXT: [[TMP18:%.*]] = shufflevector <2 x i32> [[TMP17]], <2 x i32> [[TMP2]], <2 x i32> ; THRESH-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP14]], <2 x i32> [[TMP16]], <2 x i32> [[TMP18]] ; THRESH-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0 ; THRESH-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll @@ -441,24 +441,12 @@ ; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @reschedule_extract( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B]], i32 2 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B]], i32 1 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[B]], i32 0 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[A]], i32 2 -; MINTREESIZE-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[A]], i32 1 -; MINTREESIZE-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[A]], i32 0 -; MINTREESIZE-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0 -; MINTREESIZE-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP4]], i32 1 -; MINTREESIZE-NEXT: [[TMP11:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 -; MINTREESIZE-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP3]], i32 1 -; MINTREESIZE-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0 -; MINTREESIZE-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[TMP2]], i32 1 -; MINTREESIZE-NEXT: [[TMP16:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0 -; MINTREESIZE-NEXT: [[TMP17:%.*]] = insertelement <2 x float> [[TMP16]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: ret <4 x float> [[TMP11]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -491,24 +479,12 @@ ; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @take_credit( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B]], i32 2 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B]], i32 1 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[B]], i32 0 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[A]], i32 2 -; MINTREESIZE-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[A]], i32 1 -; MINTREESIZE-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[A]], i32 0 -; MINTREESIZE-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0 -; MINTREESIZE-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP4]], i32 1 -; MINTREESIZE-NEXT: [[TMP11:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 -; MINTREESIZE-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP11]], float [[TMP3]], i32 1 -; MINTREESIZE-NEXT: [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0 -; MINTREESIZE-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP2]], i32 1 -; MINTREESIZE-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0 -; MINTREESIZE-NEXT: [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP17:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <4 x float> [[TMP17]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -565,40 +541,16 @@ ; NOTHRESHOLD-NEXT: ret <8 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @_vadd256( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[B:%.*]], i32 7 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[B]], i32 6 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[B]], i32 5 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[B]], i32 4 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = extractelement <8 x float> [[B]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[B]], i32 2 -; MINTREESIZE-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[B]], i32 1 -; MINTREESIZE-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[B]], i32 0 -; MINTREESIZE-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7 -; MINTREESIZE-NEXT: [[TMP10:%.*]] = extractelement <8 x float> [[A]], i32 6 -; MINTREESIZE-NEXT: [[TMP11:%.*]] = extractelement <8 x float> [[A]], i32 5 -; MINTREESIZE-NEXT: [[TMP12:%.*]] = extractelement <8 x float> [[A]], i32 4 -; MINTREESIZE-NEXT: [[TMP13:%.*]] = extractelement <8 x float> [[A]], i32 3 -; MINTREESIZE-NEXT: [[TMP14:%.*]] = extractelement <8 x float> [[A]], i32 2 -; MINTREESIZE-NEXT: [[TMP15:%.*]] = extractelement <8 x float> [[A]], i32 1 -; MINTREESIZE-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[A]], i32 0 -; MINTREESIZE-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP16]], i32 0 -; MINTREESIZE-NEXT: [[TMP18:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP8]], i32 1 -; MINTREESIZE-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i32 0 -; MINTREESIZE-NEXT: [[TMP20:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP7]], i32 1 -; MINTREESIZE-NEXT: [[TMP21:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i32 0 -; MINTREESIZE-NEXT: [[TMP22:%.*]] = insertelement <2 x float> [[TMP21]], float [[TMP6]], i32 1 -; MINTREESIZE-NEXT: [[TMP23:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0 -; MINTREESIZE-NEXT: [[TMP24:%.*]] = insertelement <2 x float> [[TMP23]], float [[TMP5]], i32 1 -; MINTREESIZE-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i32 0 -; MINTREESIZE-NEXT: [[TMP26:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP4]], i32 1 -; MINTREESIZE-NEXT: [[TMP27:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i32 0 -; MINTREESIZE-NEXT: [[TMP28:%.*]] = insertelement <2 x float> [[TMP27]], float [[TMP3]], i32 1 -; MINTREESIZE-NEXT: [[TMP29:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i32 0 -; MINTREESIZE-NEXT: [[TMP30:%.*]] = insertelement <2 x float> [[TMP29]], float [[TMP2]], i32 1 -; MINTREESIZE-NEXT: [[TMP31:%.*]] = insertelement <2 x float> poison, float [[TMP9]], i32 0 -; MINTREESIZE-NEXT: [[TMP32:%.*]] = insertelement <2 x float> [[TMP31]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP33:%.*]] = fadd <8 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <8 x float> [[TMP33]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP9:%.*]] = fadd <8 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <8 x float> [[TMP9]] ; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %b, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -476,24 +476,12 @@ ; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @reschedule_extract( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B]], i32 2 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B]], i32 1 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[B]], i32 0 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[A]], i32 2 -; MINTREESIZE-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[A]], i32 1 -; MINTREESIZE-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[A]], i32 0 -; MINTREESIZE-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0 -; MINTREESIZE-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP4]], i32 1 -; MINTREESIZE-NEXT: [[TMP11:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 -; MINTREESIZE-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP3]], i32 1 -; MINTREESIZE-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0 -; MINTREESIZE-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[TMP2]], i32 1 -; MINTREESIZE-NEXT: [[TMP16:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0 -; MINTREESIZE-NEXT: [[TMP17:%.*]] = insertelement <2 x float> [[TMP16]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: ret <4 x float> [[TMP11]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -526,24 +514,12 @@ ; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @take_credit( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B]], i32 2 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B]], i32 1 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[B]], i32 0 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[A]], i32 2 -; MINTREESIZE-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[A]], i32 1 -; MINTREESIZE-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[A]], i32 0 -; MINTREESIZE-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0 -; MINTREESIZE-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP4]], i32 1 -; MINTREESIZE-NEXT: [[TMP11:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 -; MINTREESIZE-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP11]], float [[TMP3]], i32 1 -; MINTREESIZE-NEXT: [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0 -; MINTREESIZE-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP2]], i32 1 -; MINTREESIZE-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0 -; MINTREESIZE-NEXT: [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP17:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <4 x float> [[TMP17]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -600,40 +576,16 @@ ; NOTHRESHOLD-NEXT: ret <8 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @_vadd256( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[B:%.*]], i32 7 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[B]], i32 6 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[B]], i32 5 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[B]], i32 4 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = extractelement <8 x float> [[B]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[B]], i32 2 -; MINTREESIZE-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[B]], i32 1 -; MINTREESIZE-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[B]], i32 0 -; MINTREESIZE-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7 -; MINTREESIZE-NEXT: [[TMP10:%.*]] = extractelement <8 x float> [[A]], i32 6 -; MINTREESIZE-NEXT: [[TMP11:%.*]] = extractelement <8 x float> [[A]], i32 5 -; MINTREESIZE-NEXT: [[TMP12:%.*]] = extractelement <8 x float> [[A]], i32 4 -; MINTREESIZE-NEXT: [[TMP13:%.*]] = extractelement <8 x float> [[A]], i32 3 -; MINTREESIZE-NEXT: [[TMP14:%.*]] = extractelement <8 x float> [[A]], i32 2 -; MINTREESIZE-NEXT: [[TMP15:%.*]] = extractelement <8 x float> [[A]], i32 1 -; MINTREESIZE-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[A]], i32 0 -; MINTREESIZE-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP16]], i32 0 -; MINTREESIZE-NEXT: [[TMP18:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP8]], i32 1 -; MINTREESIZE-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i32 0 -; MINTREESIZE-NEXT: [[TMP20:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP7]], i32 1 -; MINTREESIZE-NEXT: [[TMP21:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i32 0 -; MINTREESIZE-NEXT: [[TMP22:%.*]] = insertelement <2 x float> [[TMP21]], float [[TMP6]], i32 1 -; MINTREESIZE-NEXT: [[TMP23:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0 -; MINTREESIZE-NEXT: [[TMP24:%.*]] = insertelement <2 x float> [[TMP23]], float [[TMP5]], i32 1 -; MINTREESIZE-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i32 0 -; MINTREESIZE-NEXT: [[TMP26:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP4]], i32 1 -; MINTREESIZE-NEXT: [[TMP27:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i32 0 -; MINTREESIZE-NEXT: [[TMP28:%.*]] = insertelement <2 x float> [[TMP27]], float [[TMP3]], i32 1 -; MINTREESIZE-NEXT: [[TMP29:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i32 0 -; MINTREESIZE-NEXT: [[TMP30:%.*]] = insertelement <2 x float> [[TMP29]], float [[TMP2]], i32 1 -; MINTREESIZE-NEXT: [[TMP31:%.*]] = insertelement <2 x float> poison, float [[TMP9]], i32 0 -; MINTREESIZE-NEXT: [[TMP32:%.*]] = insertelement <2 x float> [[TMP31]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP33:%.*]] = fadd <8 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <8 x float> [[TMP33]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP9:%.*]] = fadd <8 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <8 x float> [[TMP9]] ; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %b, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll @@ -20,16 +20,10 @@ ; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> poison, [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], poison ; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[TMP8]], poison -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 -; CHECK-NEXT: [[VEC1:%.*]] = insertelement <2 x float> undef, float [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP9]], i32 1 -; CHECK-NEXT: [[VEC2:%.*]] = insertelement <2 x float> [[VEC1]], float [[TMP11]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i32 2 -; CHECK-NEXT: [[VEC3:%.*]] = insertelement <2 x float> undef, float [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP9]], i32 3 -; CHECK-NEXT: [[VEC4:%.*]] = insertelement <2 x float> [[VEC3]], float [[TMP13]], i32 1 -; CHECK-NEXT: [[INS1:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VEC2]], 0 -; CHECK-NEXT: [[INS2:%.*]] = insertvalue { <2 x float>, <2 x float> } [[INS1]], <2 x float> [[VEC4]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[INS1:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP10]], 0 +; CHECK-NEXT: [[INS2:%.*]] = insertvalue { <2 x float>, <2 x float> } [[INS1]], <2 x float> [[TMP11]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[INS2]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll @@ -9,10 +9,9 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> , i32 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP0]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4 ; CHECK-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll @@ -69,22 +69,15 @@ ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP7]], i32 2 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP9]], i32 3 -; CHECK-NEXT: [[TMP11:%.*]] = mul <4 x i32> [[TMP2]], [[TMP10]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[TMP2]], [[TMP3]] ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0 ; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1 ; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2 ; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP12]], align 4 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: ret i32 undef ; %in.addr = getelementptr inbounds i32, i32* %in, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll @@ -43,14 +43,13 @@ ; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* @a, align 4 ; CHECK-NEXT: [[CONV19:%.*]] = sitofp i32 [[TMP15]] to float ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> , float [[CONV19]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[SHUFFLE]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP16]], float [[TMP17]], i32 2 -; CHECK-NEXT: [[TMP19:%.*]] = fsub <4 x float> [[TMP10]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = fadd <4 x float> [[TMP10]], [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> [[TMP20]], <4 x i32> -; CHECK-NEXT: [[TMP22:%.*]] = fptosi <4 x float> [[TMP21]] to <4 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[ARRAYIDX1]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP23]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x float> [[TMP16]], <4 x float> [[SHUFFLE]], <4 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = fsub <4 x float> [[TMP10]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = fadd <4 x float> [[TMP10]], [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> [[TMP19]], <4 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = fptosi <4 x float> [[TMP20]] to <4 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[ARRAYIDX1]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP21]], <4 x i32>* [[TMP22]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll @@ -60,9 +60,8 @@ ; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> poison, float [[X0]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[I21:%.*]] = shufflevector <4 x float> [[I0]], <4 x float> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I21]], float [[TMP4]], i32 3 -; CHECK-NEXT: ret <4 x float> [[I3]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[I21]], <4 x float> [[TMP3]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[TMP4]] ; %gep0 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 0 %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll @@ -60,9 +60,8 @@ ; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X0]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[I21:%.*]] = shufflevector <4 x float> [[I0]], <4 x float> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I21]], float [[TMP4]], i32 3 -; CHECK-NEXT: ret <4 x float> [[I3]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[I21]], <4 x float> [[TMP3]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[TMP4]] ; %gep0 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 0 %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll @@ -29,10 +29,9 @@ ; CHECK: if.then: ; CHECK-NEXT: [[AND0_TMP:%.*]] = and i64 [[TMP8]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[AND0_TMP]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP10]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i64> [[TMP11]], [[TMP7]] -; CHECK-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP9]], <2 x i64> [[TMP6]], <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = and <2 x i64> [[TMP10]], [[TMP7]] +; CHECK-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll @@ -151,7 +151,7 @@ ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[R_052:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[TMP2]], [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[TMP2]], [[ENTRY]] ], [ [[TMP16:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x float> [ [[TMP5]], [[ENTRY]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0 ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP8]], 7.000000e+00 @@ -163,25 +163,22 @@ ; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[ARRAYIDX19]] to <2 x float>* ; CHECK-NEXT: [[TMP12]] = load <2 x float>, <2 x float>* [[TMP11]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> poison, float [[TMP13]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP10]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x float> [[TMP15]], <4 x float> [[TMP16]], <4 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = fmul <4 x float> [[TMP17]], -; CHECK-NEXT: [[TMP19]] = fadd <4 x float> [[TMP6]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP20]], 121 +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> [[TMP12]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = fmul <4 x float> [[TMP14]], +; CHECK-NEXT: [[TMP16]] = fadd <4 x float> [[TMP6]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP17]], 121 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP19]], i32 0 -; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP21]] -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP19]], i32 1 -; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP22]] -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x float> [[TMP19]], i32 2 -; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP23]] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x float> [[TMP19]], i32 3 -; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP24]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[TMP16]], i32 0 +; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[TMP16]], i32 1 +; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP16]], i32 2 +; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP16]], i32 3 +; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP21]] ; CHECK-NEXT: ret float [[ADD31]] ; entry: @@ -244,27 +241,20 @@ ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ , [[ENTRY]] ], [ [[TMP9:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP5]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP9]] = fmul <4 x float> [[TMP8]], +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ , [[ENTRY]] ], [ [[TMP2:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2]] = fmul <4 x float> [[TMP1]], ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], 128 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP9]], i32 1 -; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[TMP10]], [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i32 2 -; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP9]], i32 3 -; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP13]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 +; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 +; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 +; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP6]] ; CHECK-NEXT: ret float [[ADD31]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll @@ -13,16 +13,10 @@ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0 -; CHECK-NEXT: [[VECIN0:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP3]], i32 1 -; CHECK-NEXT: [[VECIN1:%.*]] = insertelement <2 x float> [[VECIN0]], float [[TMP5]], i64 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP3]], i32 2 -; CHECK-NEXT: [[VECIN2:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP3]], i32 3 -; CHECK-NEXT: [[VECIN3:%.*]] = insertelement <2 x float> [[VECIN2]], float [[TMP7]], i64 1 -; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VECIN1]], 0 -; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[VECIN3]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP4]], 0 +; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP5]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[RET1]] ; %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll @@ -13,16 +13,10 @@ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0 -; CHECK-NEXT: [[VECIN0:%.*]] = insertelement <2 x float> undef, float [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP3]], i32 1 -; CHECK-NEXT: [[VECIN1:%.*]] = insertelement <2 x float> [[VECIN0]], float [[TMP5]], i64 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP3]], i32 2 -; CHECK-NEXT: [[VECIN2:%.*]] = insertelement <2 x float> undef, float [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP3]], i32 3 -; CHECK-NEXT: [[VECIN3:%.*]] = insertelement <2 x float> [[VECIN2]], float [[TMP7]], i64 1 -; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VECIN1]], 0 -; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[VECIN3]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP4]], 0 +; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP5]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[RET1]] ; %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll @@ -16,12 +16,10 @@ ; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 5 ; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 6 ; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 7 -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i16> [[LD]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[P0]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* [[TMP3]], align 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP0]], <8 x i16>* [[TMP1]], align 2 ; CHECK-NEXT: ret void ; ; YAML: Pass: slp-vectorizer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll @@ -14,26 +14,19 @@ ; CHECK-NEXT: [[TMP8:%.*]] = sub nsw <4 x i32> zeroinitializer, [[SHUFFLE]] ; CHECK-NEXT: [[TMP9:%.*]] = shl nsw <4 x i32> [[TMP8]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP10]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> poison, i32 [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP13]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP10]], i32 3 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP15]], i32 2 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[TMP10]], i32 2 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP17]], i32 3 -; CHECK-NEXT: [[TMP19:%.*]] = add nsw <4 x i32> [[TMP10]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = sub nsw <4 x i32> [[TMP10]], [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i32> [[TMP19]], <4 x i32> [[TMP20]], <4 x i32> -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* undef, i64 0, i64 1, i64 0 -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* undef, i64 0, i64 1, i64 2 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* undef, i64 0, i64 1, i64 1 -; CHECK-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP21]] -; CHECK-NEXT: [[TMP26:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP21]] -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i32> [[TMP25]], <4 x i32> [[TMP26]], <4 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* undef, i64 0, i64 1, i64 3 -; CHECK-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP22]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP27]], <4 x i32>* [[TMP29]], align 16 +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = sub nsw <4 x i32> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* undef, i64 0, i64 1, i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* undef, i64 0, i64 1, i64 2 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* undef, i64 0, i64 1, i64 1 +; CHECK-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP14]] +; CHECK-NEXT: [[TMP19:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP14]] +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i32> [[TMP18]], <4 x i32> [[TMP19]], <4 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* undef, i64 0, i64 1, i64 3 +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 16 ; CHECK-NEXT: ret void ; %1 = getelementptr inbounds i8, i8* undef, i64 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll @@ -50,14 +50,9 @@ ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP10]], i32 0 -; CHECK-NEXT: [[T69:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP12]], i32 4 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP10]], i32 1 -; CHECK-NEXT: [[T70:%.*]] = insertelement <8 x i32> [[T69]], i32 [[TMP13]], i32 5 -; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T70]], i32 [[T34]], i32 6 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP10]], i32 3 -; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[TMP14]], i32 7 -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP11]], <8 x i32> [[TMP11]], <8 x i32> +; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[TMP12]], i32 [[T34]], i32 6 +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], ; CHECK-NEXT: [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>* ; CHECK-NEXT: store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll @@ -50,14 +50,9 @@ ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP10]], i32 0 -; CHECK-NEXT: [[T69:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP12]], i32 4 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP10]], i32 1 -; CHECK-NEXT: [[T70:%.*]] = insertelement <8 x i32> [[T69]], i32 [[TMP13]], i32 5 -; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T70]], i32 [[T34]], i32 6 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP10]], i32 3 -; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[TMP14]], i32 7 -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP11]], <8 x i32> [[TMP11]], <8 x i32> +; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[TMP12]], i32 [[T34]], i32 6 +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], ; CHECK-NEXT: [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>* ; CHECK-NEXT: store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll @@ -12,7 +12,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[CONV]], i32 1 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP18:%.*]], [[BB3:%.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP14:%.*]], [[BB3:%.*]] ] ; CHECK-NEXT: [[TMP3:%.*]] = load double, double* undef, align 8 ; CHECK-NEXT: br i1 undef, label [[BB3]], label [[BB4:%.*]] ; CHECK: bb4: @@ -23,17 +23,13 @@ ; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x double> poison, double [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x double> [[TMP11]], double [[TMP12]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = fcmp ogt <4 x double> [[TMP13]], [[TMP4]] -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = fptrunc <4 x double> [[TMP15]] to <4 x float> -; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP14]], <4 x float> [[TMP2]], <4 x float> [[TMP16]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <4 x double> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = fptrunc <4 x double> [[TMP10]] to <4 x float> +; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[TMP2]], <4 x float> [[TMP12]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP18]] = phi <4 x float> [ [[TMP17]], [[BB4]] ], [ [[TMP2]], [[BB2]] ] +; CHECK-NEXT: [[TMP14]] = phi <4 x float> [ [[TMP13]], [[BB4]] ], [ [[TMP2]], [[BB2]] ] ; CHECK-NEXT: br label [[BB2]] ; entry: