diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -274,6 +274,25 @@ return false; } +/// Checks if the given value is actually an undefined constant vector. +static bool isUndefVector(const Value *V) { + auto *C = dyn_cast(V); + if (!C) + return false; + if (!C->containsUndefOrPoisonElement()) + return false; + if (isa(C)) + return true; + for (unsigned I = 0, + E = cast(C->getType())->getNumElements(); + I != E; ++I) { + if (Constant *Elem = C->getAggregateElement(I)) + if (!isa(Elem)) + return false; + } + return true; +} + /// Checks if the vector of instructions can be represented as a shuffle, like: /// %x0 = extractelement <4 x i8> %x, i32 0 /// %x3 = extractelement <4 x i8> %x, i32 3 @@ -293,32 +312,16 @@ /// i32 6> /// %2 = mul <4 x i8> %1, %1 /// ret <4 x i8> %2 -/// We convert this initially to something like: -/// %x0 = extractelement <4 x i8> %x, i32 0 -/// %x3 = extractelement <4 x i8> %x, i32 3 -/// %y1 = extractelement <4 x i8> %y, i32 1 -/// %y2 = extractelement <4 x i8> %y, i32 2 -/// %1 = insertelement <4 x i8> poison, i8 %x0, i32 0 -/// %2 = insertelement <4 x i8> %1, i8 %x3, i32 1 -/// %3 = insertelement <4 x i8> %2, i8 %y1, i32 2 -/// %4 = insertelement <4 x i8> %3, i8 %y2, i32 3 -/// %5 = mul <4 x i8> %4, %4 -/// %6 = extractelement <4 x i8> %5, i32 0 -/// %ins1 = insertelement <4 x i8> poison, i8 %6, i32 0 -/// %7 = extractelement <4 x i8> %5, i32 1 -/// %ins2 = insertelement <4 x i8> %ins1, i8 %7, i32 1 -/// %8 = extractelement <4 x i8> %5, i32 2 -/// %ins3 = insertelement <4 x i8> %ins2, i8 %8, i32 2 -/// %9 = extractelement <4 x i8> %5, i32 3 -/// %ins4 = insertelement <4 x i8> %ins3, i8 %9, i32 3 -/// ret <4 x i8> %ins4 -/// InstCombiner transforms this into a shuffle and vector mul /// Mask will return the Shuffle Mask equivalent to the extracted elements. /// TODO: Can we split off and reuse the shuffle mask detection from /// TargetTransformInfo::getInstructionThroughput? static Optional isFixedVectorShuffle(ArrayRef VL, SmallVectorImpl &Mask) { - auto *EI0 = cast(VL[0]); + const auto *It = + llvm::find_if(VL, [](Value *V) { return isa(V); }); + if (It == VL.end()) + return None; + auto *EI0 = cast(*It); if (isa(EI0->getVectorOperandType())) return None; unsigned Size = @@ -328,6 +331,10 @@ enum ShuffleMode { Unknown, Select, Permute }; ShuffleMode CommonShuffleMode = Unknown; for (unsigned I = 0, E = VL.size(); I < E; ++I) { + if (isa(VL[I])) { + Mask.push_back(UndefMaskElem); + continue; + } auto *EI = cast(VL[I]); auto *Vec = EI->getVectorOperand(); // All vector operands must have the same number of vector elements. @@ -341,19 +348,23 @@ Mask.push_back(UndefMaskElem); continue; } - unsigned IntIdx = Idx->getValue().getZExtValue(); - Mask.push_back(IntIdx); // We can extractelement from undef or poison vector. - if (isa(Vec)) + if (isUndefVector(Vec)) { + Mask.push_back(UndefMaskElem); continue; + } + unsigned IntIdx = Idx->getValue().getZExtValue(); + Mask.push_back(IntIdx); // For correct shuffling we have to have at most 2 different vector operands // in all extractelement instructions. - if (!Vec1 || Vec1 == Vec) + if (!Vec1 || Vec1 == Vec) { Vec1 = Vec; - else if (!Vec2 || Vec2 == Vec) + } else if (!Vec2 || Vec2 == Vec) { Vec2 = Vec; - else + Mask.back() += Size; + } else { return None; + } if (CommonShuffleMode == Permute) continue; // If the extract index is not the same as the operation number, it is a @@ -1977,7 +1988,7 @@ /// Maps a specific scalar to its tree entry. SmallDenseMap ScalarToTreeEntry; - /// Maps a value to the proposed vectorizable size. + /// Maps a value to the proposed vectorizable size. SmallDenseMap InstrElementSize; /// A list of scalars that we found that we need to keep as scalars. @@ -3736,13 +3747,33 @@ ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n"); TE->setOperandsInOrder(); - for (unsigned i = 0, e = 2; i < e; ++i) { + for (unsigned I = 0, E = 2; I < E; ++I) { ValueList Operands; // Prepare the operand vector. - for (Value *V : VL) - Operands.push_back(cast(V)->getOperand(i)); + if (I >= 1) { + // Need to cast all elements to the same type before vectorization to + // avoid crash. + Type *VL0Ty = VL0->getOperand(I)->getType(); + Type *Ty = + all_of(VL, [VL0Ty](Value *V) { return VL0Ty == V->getType(); }) + ? VL0Ty + : DL->getIndexType(cast(VL0) + ->getPointerOperandType() + ->getScalarType()); + // Prepare the operand vector. + for (Value *V : VL) { + auto *Op = cast(V)->getOperand(I); + auto *CI = cast(Op); + Operands.push_back(ConstantExpr::getIntegerCast( + CI, Ty, CI->getValue().isSignBitSet())); + } + } else { + // Prepare the operand vector. + for (Value *V : VL) + Operands.push_back(cast(V)->getOperand(I)); + } - buildTree_rec(Operands, Depth + 1, {TE, i}); + buildTree_rec(Operands, Depth + 1, {TE, I}); } return; } @@ -4314,10 +4345,7 @@ // broadcast. return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy); } - if (E->getOpcode() == Instruction::ExtractElement && allSameType(VL) && - allSameBlock(VL) && - !isa( - cast(E->getMainOp())->getVectorOperandType())) { + if (E->getOpcode() == Instruction::ExtractElement && allSameType(VL)) { // Check that gather of extractelements can be represented as just a // shuffle of a single/two vectors the scalars are extracted from. SmallVector Mask; @@ -4558,7 +4586,8 @@ return !is_contained(E->Scalars, cast(V)->getOperand(0)); })); - if (isa(FirstInsert->getOperand(0))) { + + if (isUndefVector(FirstInsert->getOperand(0))) { Cost += TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, SrcVecTy, Mask); } else { SmallVector InsertMask(NumElts); @@ -5327,6 +5356,13 @@ } } + // Return if just building a vector of undefs. + if (UsedTEs.empty()) { + assert(all_of(TE->Scalars, UndefValue::classof) && + "Expected vector of undefs only."); + return None; + } + unsigned VF = 0; if (UsedTEs.size() == 1) { // Try to find the perfect match in another gather node at first. @@ -5674,59 +5710,12 @@ } } - // Check that every instruction appears once in this bundle. - SmallVector ReuseShuffleIndicies; - SmallVector UniqueValues; - if (VL.size() > 2) { - DenseMap UniquePositions; - unsigned NumValues = - std::distance(VL.begin(), find_if(reverse(VL), [](Value *V) { - return !isa(V); - }).base()); - VF = std::max(VF, PowerOf2Ceil(NumValues)); - int UniqueVals = 0; - for (Value *V : VL.drop_back(VL.size() - VF)) { - if (isa(V)) { - ReuseShuffleIndicies.emplace_back(UndefMaskElem); - continue; - } - if (isConstant(V)) { - ReuseShuffleIndicies.emplace_back(UniqueValues.size()); - UniqueValues.emplace_back(V); - continue; - } - auto Res = UniquePositions.try_emplace(V, UniqueValues.size()); - ReuseShuffleIndicies.emplace_back(Res.first->second); - if (Res.second) { - UniqueValues.emplace_back(V); - ++UniqueVals; - } - } - if (UniqueVals == 1 && UniqueValues.size() == 1) { - // Emit pure splat vector. - ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(), - UndefMaskElem); - } else if (UniqueValues.size() >= VF - 1 || UniqueValues.size() <= 1) { - ReuseShuffleIndicies.clear(); - UniqueValues.clear(); - UniqueValues.append(VL.begin(), std::next(VL.begin(), NumValues)); - } - UniqueValues.append(VF - UniqueValues.size(), - PoisonValue::get(VL[0]->getType())); - VL = UniqueValues; - } - - ShuffleInstructionBuilder ShuffleBuilder(Builder, VF); - Value *Vec = gather(VL); - if (!ReuseShuffleIndicies.empty()) { - ShuffleBuilder.addMask(ReuseShuffleIndicies); - Vec = ShuffleBuilder.finalize(Vec); - if (auto *I = dyn_cast(Vec)) { - GatherSeq.insert(I); - CSEBlocks.insert(I->getParent()); - } - } - return Vec; + auto *I = + find_if(VectorizableTree, [VL](const std::unique_ptr &TE) { + return TE->State == TreeEntry::NeedToGather && TE->isSame(VL); + }); + assert(I != VectorizableTree.end() && "Gather node is not in the graph."); + return vectorizeTree(I->get()); } Value *BoUpSLP::vectorizeTree(TreeEntry *E) { @@ -5743,22 +5732,137 @@ VF = E->ReuseShuffleIndices.size(); ShuffleInstructionBuilder ShuffleBuilder(Builder, VF); if (E->State == TreeEntry::NeedToGather) { - setInsertPointAfterBundle(E); - Value *Vec; + // Can set insert point safely on for the initial gather node. + if (E == VectorizableTree.front().get() && E->getMainOp()) + setInsertPointAfterBundle(E); + SmallVector ReuseShuffleIndicies(E->ReuseShuffleIndices.begin(), + E->ReuseShuffleIndices.end()); + Value *Vec = nullptr; + SmallVector GatheredScalars(E->Scalars.begin(), E->Scalars.end()); SmallVector Mask; - SmallVector Entries; - Optional Shuffle = - isGatherShuffledEntry(E, Mask, Entries); - if (Shuffle.hasValue()) { - assert((Entries.size() == 1 || Entries.size() == 2) && - "Expected shuffle of 1 or 2 entries."); - Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue, - Entries.back()->VectorizedValue, Mask); + if (!all_of(GatheredScalars, UndefValue::classof)) { + Type *ScalarTy = GatheredScalars.front()->getType(); + SmallVector Entries; + Optional Shuffle = + isGatherShuffledEntry(E, Mask, Entries); + if (Shuffle.hasValue()) { + assert((Entries.size() == 1 || Entries.size() == 2) && + "Expected shuffle of 1 or 2 entries."); + // Remove shuffled elements from list of gathers. + for (int I = 0, Sz = Mask.size(); I < Sz; ++I) { + if (Mask[I] != UndefMaskElem) + GatheredScalars[I] = PoisonValue::get(ScalarTy); + } + if (Entries.size() == 1) { + int Limit = Mask.size() * 2; + if (cast(Entries.front()->VectorizedValue->getType()) + ->getNumElements() != Mask.size() || + (all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) && + !ShuffleVectorInst::isIdentityMask(Mask))) { + Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue, + Mask); + } else { + Vec = Entries.front()->VectorizedValue; + } + } else { + Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue, + Entries.back()->VectorizedValue, + Mask); + } + } else { + // Check that every instruction appears once in this bundle. + SmallVector UniqueValues; + if (GatheredScalars.size() > 2 && !NeedToShuffleReuses) { + DenseMap UniquePositions; + int UniqueVals = 0; + for (Value *V : GatheredScalars) { + if (isa(V) && !isa(V)) { + ReuseShuffleIndicies.emplace_back(UndefMaskElem); + continue; + } + if (isConstant(V)) { + ReuseShuffleIndicies.emplace_back(UniqueValues.size()); + UniqueValues.emplace_back(V); + continue; + } + auto Res = UniquePositions.try_emplace(V, UniqueValues.size()); + ReuseShuffleIndicies.emplace_back(Res.first->second); + if (Res.second) { + UniqueValues.emplace_back(V); + ++UniqueVals; + } + } + if (UniqueVals == 1 && UniqueValues.size() == 1) { + // Emit pure splat vector. + ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(), + UndefMaskElem); + } else if (UniqueValues.size() >= VF - 1 || + UniqueValues.size() <= 1) { + ReuseShuffleIndicies.clear(); + UniqueValues.swap(GatheredScalars); + } + UniqueValues.append(VF - UniqueValues.size(), + PoisonValue::get(ScalarTy)); + GatheredScalars.swap(UniqueValues); + } + } + } + + Value *VecVal = nullptr; + if (E->getOpcode() == Instruction::ExtractElement && + allSameType(GatheredScalars)) { + // Check that gather of extractelements can be represented as just a + // shuffle of a single/two vectors the scalars are extracted from. + SmallVector Mask; + Optional ShuffleKind = + isFixedVectorShuffle(GatheredScalars, Mask); + if (ShuffleKind.hasValue()) { + // Find input vectors. + Value *Vec1 = nullptr; + Value *Vec2 = nullptr; + for (int I = 0, Sz = Mask.size(); I < Sz; ++I) { + if (Mask[I] == UndefMaskElem) + continue; + auto *EI = cast(GatheredScalars[I]); + if (!Vec1) { + Vec1 = EI->getVectorOperand(); + } else if (Vec1 != EI->getVectorOperand()) { + assert((!Vec2 || Vec2 == EI->getVectorOperand()) && + "Expected only 1 or 2 vectors shuffle."); + Vec2 = EI->getVectorOperand(); + } + } + if (Vec2) { + VecVal = Builder.CreateShuffleVector(Vec1, Vec2, Mask); + } else if (!Vec1) { + // If extracts are all from undef vectors - just build poison vector. + VecVal = PoisonValue::get(FixedVectorType::get( + GatheredScalars.front()->getType(), GatheredScalars.size())); + } else if (GatheredScalars.size() != + cast(Vec1->getType()) + ->getNumElements() || + !ShuffleVectorInst::isIdentityMask(Mask)) { + VecVal = Builder.CreateShuffleVector(Vec1, Mask); + } + } + } + if (!VecVal) + VecVal = gather(GatheredScalars); + if (Vec) { + if (!all_of(GatheredScalars, UndefValue::classof)) { + for (int I = 0, Sz = GatheredScalars.size(); I < Sz; ++I) { + if (!isa(GatheredScalars[I])) + Mask[I] = I + Sz; + else + Mask[I] = I; + } + Vec = Builder.CreateShuffleVector(Vec, VecVal, Mask); + } } else { - Vec = gather(E->Scalars); + Vec = VecVal; } - if (NeedToShuffleReuses) { - ShuffleBuilder.addMask(E->ReuseShuffleIndices); + if (!ReuseShuffleIndicies.empty()) { + ShuffleBuilder.addMask(ReuseShuffleIndicies); Vec = ShuffleBuilder.finalize(Vec); if (auto *I = dyn_cast(Vec)) { GatherSeq.insert(I); @@ -5884,7 +5988,7 @@ V = Builder.CreateShuffleVector(V, Mask); if ((!IsIdentity || Offset != 0 || - !isa(FirstInsert->getOperand(0))) && + !isUndefVector(FirstInsert->getOperand(0))) && NumElts != NumScalars) { SmallVector InsertMask(NumElts); std::iota(InsertMask.begin(), InsertMask.end(), 0); diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll @@ -23,14 +23,13 @@ ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[VECEXT_1]], i32 0 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[VECEXT_2]], i32 1 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP2]]) +; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP4]], <4 x i32> ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP6:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]]) -; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP6]], i32 3 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]]) +; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP5]], i32 3 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] ; entry: @@ -219,14 +218,13 @@ ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[VECEXT_1]], i32 0 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[VECEXT_2]], i32 1 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP2]]) +; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP4]], <4 x i32> ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP6:%.*]] = tail call fast float @expf(float [[VECEXT_3]]) -; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP6]], i32 3 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = tail call fast float @expf(float [[VECEXT_3]]) +; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP5]], i32 3 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] ; entry: @@ -302,14 +300,13 @@ ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[VECEXT_1]], i32 0 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[VECEXT_2]], i32 1 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP2]]) +; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP4]], <4 x i32> ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP6:%.*]] = tail call fast float @logf(float [[VECEXT_3]]) -; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP6]], i32 3 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = tail call fast float @logf(float [[VECEXT_3]]) +; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP5]], i32 3 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] ; entry: @@ -478,14 +475,13 @@ ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[VECEXT_1]], i32 0 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[VECEXT_2]], i32 1 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP2]]) +; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP4]], <4 x i32> ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP6:%.*]] = tail call fast float @sinf(float [[VECEXT_3]]) -; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP6]], i32 3 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = tail call fast float @sinf(float [[VECEXT_3]]) +; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP5]], i32 3 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] ; entry: @@ -520,14 +516,13 @@ ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[VECEXT_1]], i32 0 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[VECEXT_2]], i32 1 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP2]]) +; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP4]], <4 x i32> ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP6:%.*]] = tail call fast float @cosf(float [[VECEXT_3]]) -; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP6]], i32 3 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = tail call fast float @cosf(float [[VECEXT_3]]) +; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP5]], i32 3 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] ; entry: @@ -1011,14 +1006,13 @@ ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[VECEXT_1]], i32 0 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[VECEXT_2]], i32 1 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP2]]) +; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP4]], <4 x i32> ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP6:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]]) -; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP6]], i32 3 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]]) +; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP5]], i32 3 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll @@ -23,14 +23,13 @@ ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[VECEXT_1]], i32 0 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[VECEXT_2]], i32 1 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP2]]) +; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP4]], <4 x i32> ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP6:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]]) -; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP6]], i32 3 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]]) +; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP5]], i32 3 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] ; entry: @@ -219,14 +218,13 @@ ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[VECEXT_1]], i32 0 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[VECEXT_2]], i32 1 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP2]]) +; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP4]], <4 x i32> ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP6:%.*]] = tail call fast float @expf(float [[VECEXT_3]]) -; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP6]], i32 3 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = tail call fast float @expf(float [[VECEXT_3]]) +; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP5]], i32 3 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] ; entry: @@ -302,14 +300,13 @@ ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[VECEXT_1]], i32 0 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[VECEXT_2]], i32 1 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP2]]) +; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP4]], <4 x i32> ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP6:%.*]] = tail call fast float @logf(float [[VECEXT_3]]) -; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP6]], i32 3 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = tail call fast float @logf(float [[VECEXT_3]]) +; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP5]], i32 3 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] ; entry: @@ -478,14 +475,13 @@ ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[VECEXT_1]], i32 0 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[VECEXT_2]], i32 1 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP2]]) +; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP4]], <4 x i32> ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP6:%.*]] = tail call fast float @sinf(float [[VECEXT_3]]) -; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP6]], i32 3 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = tail call fast float @sinf(float [[VECEXT_3]]) +; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP5]], i32 3 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] ; entry: @@ -520,14 +516,13 @@ ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[VECEXT_1]], i32 0 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[VECEXT_2]], i32 1 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP2]]) +; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP4]], <4 x i32> ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP6:%.*]] = tail call fast float @cosf(float [[VECEXT_3]]) -; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP6]], i32 3 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = tail call fast float @cosf(float [[VECEXT_3]]) +; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP5]], i32 3 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] ; entry: @@ -1011,14 +1006,13 @@ ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[VECEXT_1]], i32 0 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[VECEXT_2]], i32 1 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP3:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP2]]) +; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_21:%.*]] = shufflevector <4 x float> [[VECINS]], <4 x float> [[TMP4]], <4 x i32> ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP6:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]]) -; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP6]], i32 3 +; NOACCELERATE-NEXT: [[TMP5:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]]) +; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_21]], float [[TMP5]], i32 3 ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll @@ -24,14 +24,14 @@ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX43]] to <4 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> poison, <4 x double> zeroinitializer, <4 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> undef, <4 x double> zeroinitializer, <4 x double> [[TMP2]]) ; CHECK-NEXT: br label [[SW_EPILOG:%.*]] ; CHECK: sw.bb195: ; CHECK-NEXT: br label [[SW_EPILOG]] ; CHECK: do.body: ; CHECK-NEXT: unreachable ; CHECK: sw.epilog: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x double> [ poison, [[SW_BB195]] ], [ [[TMP3]], [[SW_BB]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x double> [ undef, [[SW_BB195]] ], [ [[TMP3]], [[SW_BB]] ] ; CHECK-NEXT: ret i32 undef ; CHECK: if.end.1: ; CHECK-NEXT: br label [[FOR_COND15_1:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll @@ -149,7 +149,7 @@ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_0]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_1]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> undef, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP5]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll @@ -149,7 +149,7 @@ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_0]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_1]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> undef, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP5]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll @@ -15,14 +15,13 @@ ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, <4 x double>* [[PTR_2:%.*]], align 16 ; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 ; CHECK-NEXT: [[V2_LANE_3:%.*]] = extractelement <4 x double> [[V_2]], i32 3 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V2_LANE_3]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[V_1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[V_1]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> [[V_1]], [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[V_1]], i32 0 +; CHECK-NEXT: call void @use(double [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[V_1]], i32 1 ; CHECK-NEXT: call void @use(double [[TMP3]]) -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[V_1]], i32 1 -; CHECK-NEXT: call void @use(double [[TMP4]]) -; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* [[PTR_1]], align 8 +; CHECK-NEXT: store <2 x double> [[TMP1]], <2 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -57,14 +56,12 @@ ; CHECK-NEXT: [[V3_LANE_1:%.*]] = extractelement <2 x double> [[V_3]], i32 1 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, <4 x double>* [[PTR_2:%.*]], align 16 ; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_0]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V3_LANE_1]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[V2_LANE_2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> [[V_1]], <2 x double> [[V_3]], <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]] ; CHECK-NEXT: call void @use(double [[V1_LANE_0]]) ; CHECK-NEXT: call void @use(double [[V3_LANE_1]]) -; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[PTR_1]], align 8 +; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -99,15 +96,13 @@ ; CHECK-NEXT: [[V1_LANE_3:%.*]] = extractelement <4 x double> [[V_1]], i32 3 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, <4 x double>* [[PTR_2:%.*]], align 16 ; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1_LANE_3]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[V2_LANE_2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[V_1]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_3]]) -; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[PTR_1]], align 8 +; CHECK-NEXT: store <4 x double> [[TMP3]], <4 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -138,15 +133,14 @@ ; CHECK-NEXT: [[V_1:%.*]] = load <2 x double>, <2 x double>* [[PTR_1:%.*]], align 8 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, <4 x double>* [[PTR_2:%.*]], align 16 ; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V2_LANE_2]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[V_1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[V_1]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> [[V_1]], [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[V_1]], i32 0 +; CHECK-NEXT: call void @use(double [[TMP3]]) +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[V_1]], i32 1 ; CHECK-NEXT: call void @use(double [[TMP4]]) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[V_1]], i32 1 -; CHECK-NEXT: call void @use(double [[TMP5]]) -; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[PTR_1]], align 8 +; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -179,15 +173,13 @@ ; CHECK-NEXT: [[V1_LANE_2:%.*]] = extractelement <4 x double> [[V_1]], i32 2 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, <4 x double>* [[PTR_2:%.*]], align 16 ; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_1]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1_LANE_2]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[V2_LANE_2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[V_1]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_1]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) -; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[PTR_1]], align 8 +; CHECK-NEXT: store <4 x double> [[TMP3]], <4 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -226,20 +218,16 @@ ; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0 ; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1 ; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> poison, double [[V1_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[V1_LANE_3]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[V1_LANE_0]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[V1_LANE_1]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[V2_LANE_0]], i32 1 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP3]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> poison, <9 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP0]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <9 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_0]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_1]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_3]]) -; CHECK-NEXT: store <9 x double> [[TMP7]], <9 x double>* [[PTR_1]], align 8 +; CHECK-NEXT: store <9 x double> [[TMP3]], <9 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -344,38 +332,20 @@ ; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0 ; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1 ; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_3]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x double> [[TMP0]], double [[V1_LANE_4]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x double> [[TMP1]], double [[V1_LANE_5]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x double> [[TMP2]], double [[V1_LANE_6]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x double> [[TMP3]], double [[V1_LANE_7]], i32 4 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x double> [[TMP4]], double [[V1_LANE_8]], i32 5 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x double> [[TMP5]], double [[V1_LANE_0]], i32 6 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x double> [[TMP6]], double [[V1_LANE_1]], i32 7 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_0]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[V2_LANE_2]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[V2_LANE_1]], i32 2 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[SHUFFLE1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP12]], double [[A_LANE_8]], i32 8 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_6]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V1_LANE_7]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V1_LANE_8]], i32 2 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x double> [[TMP15]], double [[V1_LANE_0]], i32 3 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x double> [[TMP16]], double [[V1_LANE_1]], i32 4 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> [[TMP17]], double [[V1_LANE_2]], i32 5 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_3]], i32 6 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_4]], i32 7 -; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V2_LANE_1]], i32 1 -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V2_LANE_0]], i32 2 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x double> [[TMP23]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul <8 x double> [[TMP4]], [[SHUFFLE]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP25]], double [[B_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP7]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], <9 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void @@ -461,34 +431,18 @@ ; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0 ; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1 ; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_4]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x double> [[TMP0]], double [[V1_LANE_3]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x double> [[TMP1]], double [[V1_LANE_6]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x double> [[TMP2]], double [[V1_LANE_5]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x double> [[TMP3]], double [[V1_LANE_8]], i32 4 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x double> [[TMP4]], double [[V1_LANE_7]], i32 5 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x double> [[TMP5]], double [[V1_LANE_1]], i32 6 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x double> [[TMP6]], double [[V1_LANE_0]], i32 7 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_1]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[V2_LANE_0]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[V2_LANE_2]], i32 2 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[SHUFFLE1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_1]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP12]], double [[A_LANE_8]], i32 8 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_6]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V1_LANE_7]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V1_LANE_8]], i32 2 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x double> [[TMP15]], double [[V1_LANE_0]], i32 3 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x double> [[TMP16]], double [[V1_LANE_1]], i32 4 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> [[TMP17]], double [[V1_LANE_2]], i32 5 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_3]], i32 6 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_4]], i32 7 -; CHECK-NEXT: [[TMP21:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fmul <8 x double> [[TMP4]], [[SHUFFLE1]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x double> [[TMP21]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP22]], double [[B_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP6]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], <9 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void @@ -574,38 +528,20 @@ ; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0 ; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1 ; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_4]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x double> [[TMP0]], double [[V1_LANE_3]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x double> [[TMP1]], double [[V1_LANE_5]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x double> [[TMP2]], double [[V1_LANE_6]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x double> [[TMP3]], double [[V1_LANE_8]], i32 4 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x double> [[TMP4]], double [[V1_LANE_7]], i32 5 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x double> [[TMP5]], double [[V1_LANE_1]], i32 6 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x double> [[TMP6]], double [[V1_LANE_0]], i32 7 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_0]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[V2_LANE_2]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[V2_LANE_1]], i32 2 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[SHUFFLE1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP12]], double [[A_LANE_8]], i32 8 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_7]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V1_LANE_6]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V1_LANE_8]], i32 2 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x double> [[TMP15]], double [[V1_LANE_1]], i32 3 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x double> [[TMP16]], double [[V1_LANE_0]], i32 4 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> [[TMP17]], double [[V1_LANE_3]], i32 5 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_2]], i32 6 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_5]], i32 7 -; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V2_LANE_1]], i32 1 -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V2_LANE_0]], i32 2 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x double> [[TMP23]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul <8 x double> [[TMP4]], [[SHUFFLE]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_4]], [[V2_LANE_2]] -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP25]], double [[B_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP7]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], <9 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll @@ -244,12 +244,12 @@ ; GFX8-NEXT: bb: ; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2 ; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2 -; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> undef, <2 x i32> -; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> undef, <2 x i32> +; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) ; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) -; GFX8-NEXT: [[INS_11:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> -; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_11]], i16 [[ADD_2]], i64 2 +; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> +; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2 ; GFX8-NEXT: ret <3 x i16> [[INS_2]] ; bb: @@ -291,14 +291,14 @@ ; ; GFX8-LABEL: @uadd_sat_v4i16( ; GFX8-NEXT: bb: -; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> undef, <2 x i32> -; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> undef, <2 x i32> +; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) -; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> undef, <2 x i32> -; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> undef, <2 x i32> +; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> ; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]]) -; GFX8-NEXT: [[INS_32:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> -; GFX8-NEXT: ret <4 x i16> [[INS_32]] +; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> +; GFX8-NEXT: ret <4 x i16> [[INS_31]] ; bb: %arg0.0 = extractelement <4 x i16> %arg0, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll @@ -244,12 +244,12 @@ ; GFX8-NEXT: bb: ; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2 ; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2 -; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> undef, <2 x i32> -; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> undef, <2 x i32> +; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) ; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) -; GFX8-NEXT: [[INS_11:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> -; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_11]], i16 [[ADD_2]], i64 2 +; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> +; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2 ; GFX8-NEXT: ret <3 x i16> [[INS_2]] ; bb: @@ -291,14 +291,14 @@ ; ; GFX8-LABEL: @uadd_sat_v4i16( ; GFX8-NEXT: bb: -; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> undef, <2 x i32> -; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> undef, <2 x i32> +; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) -; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> undef, <2 x i32> -; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> undef, <2 x i32> +; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> ; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]]) -; GFX8-NEXT: [[INS_32:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> -; GFX8-NEXT: ret <4 x i16> [[INS_32]] +; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> +; GFX8-NEXT: ret <4 x i16> [[INS_31]] ; bb: %arg0.0 = extractelement <4 x i16> %arg0, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll @@ -8,12 +8,10 @@ ; CHECK-NEXT: [[ARG0_2:%.*]] = extractelement <9 x i16> [[ARG0:%.*]], i64 8 ; CHECK-NEXT: [[ARG1_1:%.*]] = extractelement <9 x i16> [[ARG1:%.*]], i64 7 ; CHECK-NEXT: [[ARG1_2:%.*]] = extractelement <9 x i16> [[ARG1]], i64 8 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> poison, i16 [[ARG0_1]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> [[TMP0]], i16 [[ARG0_2]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[ARG1_1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> [[TMP2]], i16 [[ARG1_2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP1]], <2 x i16> [[TMP3]]) -; CHECK-NEXT: ret <2 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x i16> [[ARG0]], <9 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <9 x i16> [[ARG1]], <9 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +; CHECK-NEXT: ret <2 x i16> [[TMP2]] ; bb: %arg0.1 = extractelement <9 x i16> undef, i64 7 diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll @@ -18,7 +18,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ADD277]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> poison, [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> undef, [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP6]], ; CHECK-NEXT: [[ARRAYIDX372_3:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 3 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX372]] to <4 x i32>* diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll @@ -6,12 +6,6 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x half> undef, i32 4 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x half> undef, i32 5 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float> -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> poison, <8 x i32> [[TMP6]], <8 x i32> ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll @@ -6,12 +6,6 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x half> undef, i32 4 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x half> undef, i32 5 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float> -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> undef, <8 x i32> [[TMP6]], <8 x i32> ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll @@ -7,7 +7,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP10:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP9:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[SHUFFLE]], @@ -40,11 +40,10 @@ ; CHECK-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]] ; CHECK-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> , i32 [[OP_EXTRA26]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = and <2 x i32> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP10]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[SHUFFLE]], <8 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = and <2 x i32> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP9]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP8]], <2 x i32> ; CHECK-NEXT: br label [[LOOP]] ; ; FORCE_REDUCTION-LABEL: @Test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll @@ -11,12 +11,12 @@ ; SSE-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0 ; SSE-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3 ; SSE-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SSE-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) ; SSE-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SSE-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SSE-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; SSE-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i32 0 ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> @@ -32,12 +32,12 @@ ; SLM-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0 ; SLM-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3 ; SLM-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SLM-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) ; SLM-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SLM-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SLM-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; SLM-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i32 0 ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> @@ -53,12 +53,12 @@ ; AVX-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0 ; AVX-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3 ; AVX-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; AVX-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) ; AVX-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; AVX-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; AVX-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; AVX-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i32 0 ; AVX-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll @@ -11,12 +11,12 @@ ; SSE-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0 ; SSE-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3 ; SSE-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SSE-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) ; SSE-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SSE-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SSE-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; SSE-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i32 0 ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> @@ -32,12 +32,12 @@ ; SLM-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0 ; SLM-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3 ; SLM-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SLM-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) ; SLM-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SLM-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SLM-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; SLM-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i32 0 ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> @@ -53,12 +53,12 @@ ; AVX-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0 ; AVX-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3 ; AVX-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; AVX-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) ; AVX-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; AVX-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> +; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; AVX-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; AVX-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i32 0 ; AVX-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll @@ -163,10 +163,10 @@ define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) { ; CHECK-LABEL: @sitofp_4i32_8i16( ; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float> -; CHECK-NEXT: [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> -; CHECK-NEXT: ret <8 x float> [[R72]] +; CHECK-NEXT: [[R71:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> +; CHECK-NEXT: ret <8 x float> [[R71]] ; %a0 = extractelement <4 x i32> %a, i32 0 %a1 = extractelement <4 x i32> %a, i32 1 @@ -201,20 +201,20 @@ ; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = sitofp <2 x i16> [[TMP4]] to <2 x float> ; CHECK-NEXT: [[TMP6:%.*]] = uitofp <2 x i16> [[TMP4]] to <2 x float> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP6]], <2 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> undef, <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = sitofp <2 x i8> [[TMP8]] to <2 x float> ; CHECK-NEXT: [[TMP10:%.*]] = uitofp <2 x i8> [[TMP8]] to <2 x float> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x i32> -; CHECK-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> -; CHECK-NEXT: [[R53:%.*]] = shufflevector <8 x float> [[R31]], <8 x float> [[TMP12]], <8 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <8 x i32> -; CHECK-NEXT: [[R72:%.*]] = shufflevector <8 x float> [[R53]], <8 x float> [[TMP13]], <8 x i32> -; CHECK-NEXT: ret <8 x float> [[R72]] +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> +; CHECK-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> [[TMP13]], <8 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <8 x i32> +; CHECK-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP14]], <8 x i32> +; CHECK-NEXT: ret <8 x float> [[R71]] ; %a0 = extractelement <4 x i32> %a, i32 0 %a1 = extractelement <4 x i32> %a, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll @@ -163,10 +163,10 @@ define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) { ; CHECK-LABEL: @sitofp_4i32_8i16( ; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float> -; CHECK-NEXT: [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> -; CHECK-NEXT: ret <8 x float> [[R72]] +; CHECK-NEXT: [[R71:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> +; CHECK-NEXT: ret <8 x float> [[R71]] ; %a0 = extractelement <4 x i32> %a, i32 0 %a1 = extractelement <4 x i32> %a, i32 1 @@ -201,20 +201,20 @@ ; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = sitofp <2 x i16> [[TMP4]] to <2 x float> ; CHECK-NEXT: [[TMP6:%.*]] = uitofp <2 x i16> [[TMP4]] to <2 x float> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP6]], <2 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> undef, <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = sitofp <2 x i8> [[TMP8]] to <2 x float> ; CHECK-NEXT: [[TMP10:%.*]] = uitofp <2 x i8> [[TMP8]] to <2 x float> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x i32> -; CHECK-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> -; CHECK-NEXT: [[R53:%.*]] = shufflevector <8 x float> [[R31]], <8 x float> [[TMP12]], <8 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <8 x i32> -; CHECK-NEXT: [[R72:%.*]] = shufflevector <8 x float> [[R53]], <8 x float> [[TMP13]], <8 x i32> -; CHECK-NEXT: ret <8 x float> [[R72]] +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> +; CHECK-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> [[TMP13]], <8 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <8 x i32> +; CHECK-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP14]], <8 x i32> +; CHECK-NEXT: ret <8 x float> [[R71]] ; %a0 = extractelement <4 x i32> %a, i32 0 %a1 = extractelement <4 x i32> %a, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll @@ -98,11 +98,11 @@ ; SLM-LABEL: @fmul_fdiv_v4f32_const( ; SLM-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 2 ; SLM-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3 -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> ; SLM-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], ; SLM-NEXT: [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00 -; SLM-NEXT: [[R11:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> -; SLM-NEXT: [[R2:%.*]] = insertelement <4 x float> [[R11]], float [[A2]], i32 2 +; SLM-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> +; SLM-NEXT: [[R2:%.*]] = insertelement <4 x float> [[TMP3]], float [[A2]], i32 2 ; SLM-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[AB3]], i32 3 ; SLM-NEXT: ret <4 x float> [[R3]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll @@ -98,11 +98,11 @@ ; SLM-LABEL: @fmul_fdiv_v4f32_const( ; SLM-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 2 ; SLM-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3 -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> ; SLM-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], ; SLM-NEXT: [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00 -; SLM-NEXT: [[R11:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> -; SLM-NEXT: [[R2:%.*]] = insertelement <4 x float> [[R11]], float [[A2]], i32 2 +; SLM-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> +; SLM-NEXT: [[R2:%.*]] = insertelement <4 x float> [[TMP3]], float [[A2]], i32 2 ; SLM-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[AB3]], i32 3 ; SLM-NEXT: ret <4 x float> [[R3]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll @@ -170,9 +170,9 @@ define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) { ; SSE-LABEL: @ashr_shl_v8i32_const( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], ; SSE-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[R71]] @@ -234,8 +234,8 @@ ; SSE-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7 ; SSE-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 6 ; SSE-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7 -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; SSE-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> @@ -251,13 +251,13 @@ ; SSE-NEXT: ret <8 x i32> [[R7]] ; ; SLM-LABEL: @ashr_lshr_shl_v8i32( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; SLM-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; SLM-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; SLM-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; SLM-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; SLM-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -265,13 +265,13 @@ ; SLM-NEXT: ret <8 x i32> [[R71]] ; ; AVX1-LABEL: @ashr_lshr_shl_v8i32( -; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> -; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> +; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> +; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> ; AVX1-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; AVX1-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; AVX1-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; AVX1-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; AVX1-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; AVX1-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; AVX1-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; AVX1-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; AVX1-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; AVX1-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -279,13 +279,13 @@ ; AVX1-NEXT: ret <8 x i32> [[R71]] ; ; AVX2-LABEL: @ashr_lshr_shl_v8i32( -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> -; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> ; AVX2-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; AVX2-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; AVX2-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; AVX2-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; AVX2-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -293,13 +293,13 @@ ; AVX2-NEXT: ret <8 x i32> [[R71]] ; ; AVX512-LABEL: @ashr_lshr_shl_v8i32( -; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> -; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> +; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> +; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> ; AVX512-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; AVX512-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; AVX512-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; AVX512-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; AVX512-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; AVX512-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; AVX512-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -441,10 +441,10 @@ ; AVX2-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 1 ; AVX2-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5 ; AVX2-NEXT: [[AB1:%.*]] = sdiv i32 [[A1]], 4 -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> ; AVX2-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[TMP1]], ; AVX2-NEXT: [[AB5:%.*]] = sdiv i32 [[A5]], 4 -; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> ; AVX2-NEXT: [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], ; AVX2-NEXT: [[R1:%.*]] = insertelement <8 x i32> poison, i32 [[AB1]], i32 1 ; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> @@ -458,10 +458,10 @@ ; AVX512-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 1 ; AVX512-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5 ; AVX512-NEXT: [[AB1:%.*]] = sdiv i32 [[A1]], 4 -; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> +; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> ; AVX512-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[TMP1]], ; AVX512-NEXT: [[AB5:%.*]] = sdiv i32 [[A5]], 4 -; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> +; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> ; AVX512-NEXT: [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], ; AVX512-NEXT: [[R1:%.*]] = insertelement <8 x i32> poison, i32 [[AB1]], i32 1 ; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll @@ -170,9 +170,9 @@ define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) { ; SSE-LABEL: @ashr_shl_v8i32_const( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], ; SSE-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[R71]] @@ -234,8 +234,8 @@ ; SSE-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7 ; SSE-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 6 ; SSE-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7 -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; SSE-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> @@ -251,13 +251,13 @@ ; SSE-NEXT: ret <8 x i32> [[R7]] ; ; SLM-LABEL: @ashr_lshr_shl_v8i32( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; SLM-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; SLM-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; SLM-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; SLM-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; SLM-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -265,13 +265,13 @@ ; SLM-NEXT: ret <8 x i32> [[R71]] ; ; AVX1-LABEL: @ashr_lshr_shl_v8i32( -; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> -; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> +; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> +; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> ; AVX1-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; AVX1-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; AVX1-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; AVX1-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; AVX1-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; AVX1-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; AVX1-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; AVX1-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; AVX1-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; AVX1-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -279,13 +279,13 @@ ; AVX1-NEXT: ret <8 x i32> [[R71]] ; ; AVX2-LABEL: @ashr_lshr_shl_v8i32( -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> -; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> ; AVX2-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; AVX2-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; AVX2-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; AVX2-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; AVX2-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -293,13 +293,13 @@ ; AVX2-NEXT: ret <8 x i32> [[R71]] ; ; AVX512-LABEL: @ashr_lshr_shl_v8i32( -; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> -; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> +; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> +; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> ; AVX512-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; AVX512-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; AVX512-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; AVX512-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; AVX512-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP6]], [[TMP7]] ; AVX512-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP6]], [[TMP7]] ; AVX512-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> @@ -441,10 +441,10 @@ ; AVX2-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 1 ; AVX2-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5 ; AVX2-NEXT: [[AB1:%.*]] = sdiv i32 [[A1]], 4 -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> ; AVX2-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[TMP1]], ; AVX2-NEXT: [[AB5:%.*]] = sdiv i32 [[A5]], 4 -; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> ; AVX2-NEXT: [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], ; AVX2-NEXT: [[R1:%.*]] = insertelement <8 x i32> , i32 [[AB1]], i32 1 ; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> @@ -458,10 +458,10 @@ ; AVX512-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 1 ; AVX512-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5 ; AVX512-NEXT: [[AB1:%.*]] = sdiv i32 [[A1]], 4 -; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> +; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> ; AVX512-NEXT: [[TMP2:%.*]] = sdiv <2 x i32> [[TMP1]], ; AVX512-NEXT: [[AB5:%.*]] = sdiv i32 [[A5]], 4 -; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> +; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> ; AVX512-NEXT: [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], ; AVX512-NEXT: [[R1:%.*]] = insertelement <8 x i32> , i32 [[AB1]], i32 1 ; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-inseltpoison.ll @@ -623,33 +623,25 @@ ; SLM-NEXT: [[B5:%.*]] = extractelement <8 x double> [[B]], i32 5 ; SLM-NEXT: [[B6:%.*]] = extractelement <8 x double> [[B]], i32 6 ; SLM-NEXT: [[B7:%.*]] = extractelement <8 x double> [[B]], i32 7 -; SLM-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0 -; SLM-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[A1]], i32 1 -; SLM-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0 -; SLM-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[B1]], i32 1 -; SLM-NEXT: [[TMP5:%.*]] = fdiv <2 x double> [[TMP2]], [[TMP4]] -; SLM-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[A2]], i32 0 -; SLM-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[A3]], i32 1 -; SLM-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[B2]], i32 0 -; SLM-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[B3]], i32 1 -; SLM-NEXT: [[TMP10:%.*]] = fdiv <2 x double> [[TMP7]], [[TMP9]] -; SLM-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[A4]], i32 0 -; SLM-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[A5]], i32 1 -; SLM-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[B4]], i32 0 -; SLM-NEXT: [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double [[B5]], i32 1 -; SLM-NEXT: [[TMP15:%.*]] = fdiv <2 x double> [[TMP12]], [[TMP14]] -; SLM-NEXT: [[TMP16:%.*]] = insertelement <2 x double> poison, double [[A6]], i32 0 -; SLM-NEXT: [[TMP17:%.*]] = insertelement <2 x double> [[TMP16]], double [[A7]], i32 1 -; SLM-NEXT: [[TMP18:%.*]] = insertelement <2 x double> poison, double [[B6]], i32 0 -; SLM-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[TMP18]], double [[B7]], i32 1 -; SLM-NEXT: [[TMP20:%.*]] = fdiv <2 x double> [[TMP17]], [[TMP19]] -; SLM-NEXT: [[TMP21:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP10]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[R31:%.*]] = shufflevector <8 x double> [[TMP21]], <8 x double> [[TMP22]], <8 x i32> -; SLM-NEXT: [[TMP23:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x double> [[R31]], <8 x double> [[TMP23]], <8 x i32> -; SLM-NEXT: [[TMP24:%.*]] = shufflevector <2 x double> [[TMP20]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[R73:%.*]] = shufflevector <8 x double> [[R52]], <8 x double> [[TMP24]], <8 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = fdiv <2 x double> [[TMP1]], [[TMP2]] +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP6:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP5]] +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP9:%.*]] = fdiv <2 x double> [[TMP7]], [[TMP8]] +; SLM-NEXT: [[TMP10:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP11:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP12:%.*]] = fdiv <2 x double> [[TMP10]], [[TMP11]] +; SLM-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[R31:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> [[TMP14]], <8 x i32> +; SLM-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x double> [[R31]], <8 x double> [[TMP15]], <8 x i32> +; SLM-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[R73:%.*]] = shufflevector <8 x double> [[R52]], <8 x double> [[TMP16]], <8 x i32> ; SLM-NEXT: ret <8 x double> [[R73]] ; ; AVX-LABEL: @buildvector_div_8f64( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll @@ -623,33 +623,25 @@ ; SLM-NEXT: [[B5:%.*]] = extractelement <8 x double> [[B]], i32 5 ; SLM-NEXT: [[B6:%.*]] = extractelement <8 x double> [[B]], i32 6 ; SLM-NEXT: [[B7:%.*]] = extractelement <8 x double> [[B]], i32 7 -; SLM-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0 -; SLM-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[A1]], i32 1 -; SLM-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0 -; SLM-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[B1]], i32 1 -; SLM-NEXT: [[TMP5:%.*]] = fdiv <2 x double> [[TMP2]], [[TMP4]] -; SLM-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[A2]], i32 0 -; SLM-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[A3]], i32 1 -; SLM-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[B2]], i32 0 -; SLM-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[B3]], i32 1 -; SLM-NEXT: [[TMP10:%.*]] = fdiv <2 x double> [[TMP7]], [[TMP9]] -; SLM-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[A4]], i32 0 -; SLM-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[A5]], i32 1 -; SLM-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[B4]], i32 0 -; SLM-NEXT: [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double [[B5]], i32 1 -; SLM-NEXT: [[TMP15:%.*]] = fdiv <2 x double> [[TMP12]], [[TMP14]] -; SLM-NEXT: [[TMP16:%.*]] = insertelement <2 x double> poison, double [[A6]], i32 0 -; SLM-NEXT: [[TMP17:%.*]] = insertelement <2 x double> [[TMP16]], double [[A7]], i32 1 -; SLM-NEXT: [[TMP18:%.*]] = insertelement <2 x double> poison, double [[B6]], i32 0 -; SLM-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[TMP18]], double [[B7]], i32 1 -; SLM-NEXT: [[TMP20:%.*]] = fdiv <2 x double> [[TMP17]], [[TMP19]] -; SLM-NEXT: [[TMP21:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP10]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[R31:%.*]] = shufflevector <8 x double> [[TMP21]], <8 x double> [[TMP22]], <8 x i32> -; SLM-NEXT: [[TMP23:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x double> [[R31]], <8 x double> [[TMP23]], <8 x i32> -; SLM-NEXT: [[TMP24:%.*]] = shufflevector <2 x double> [[TMP20]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[R73:%.*]] = shufflevector <8 x double> [[R52]], <8 x double> [[TMP24]], <8 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = fdiv <2 x double> [[TMP1]], [[TMP2]] +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP6:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP5]] +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP9:%.*]] = fdiv <2 x double> [[TMP7]], [[TMP8]] +; SLM-NEXT: [[TMP10:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP11:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP12:%.*]] = fdiv <2 x double> [[TMP10]], [[TMP11]] +; SLM-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[R31:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> [[TMP14]], <8 x i32> +; SLM-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x double> [[R31]], <8 x double> [[TMP15]], <8 x i32> +; SLM-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[R73:%.*]] = shufflevector <8 x double> [[R52]], <8 x double> [[TMP16]], <8 x i32> ; SLM-NEXT: ret <8 x double> [[R73]] ; ; AVX-LABEL: @buildvector_div_8f64( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute-inseltpoison.ll @@ -245,7 +245,7 @@ ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[B3:%.*]] = load float, float* [[P3]], align 4 ; CHECK-NEXT: [[C0:%.*]] = fcmp ord float [[A0]], [[B0]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <2 x float> [[TMP2]], [[TMP3]] ; CHECK-NEXT: [[C3:%.*]] = fcmp ord float [[A3]], [[B3]] ; CHECK-NEXT: [[D0:%.*]] = insertelement <4 x i1> poison, i1 [[C0]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute.ll @@ -245,7 +245,7 @@ ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[B3:%.*]] = load float, float* [[P3]], align 4 ; CHECK-NEXT: [[C0:%.*]] = fcmp ord float [[A0]], [[B0]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <2 x float> [[TMP2]], [[TMP3]] ; CHECK-NEXT: [[C3:%.*]] = fcmp ord float [[A3]], [[B3]] ; CHECK-NEXT: [[D0:%.*]] = insertelement <4 x i1> undef, i1 [[C0]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll @@ -29,13 +29,13 @@ ; CHECK-NEXT: [[IXX22:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 ; CHECK-NEXT: [[IX2:%.*]] = fmul double [[TMP8]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP5]], <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> , double [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> , double [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[TMP13]], [[TMP14]] ; CHECK-NEXT: switch i32 undef, label [[BB1:%.*]] [ ; CHECK-NEXT: i32 0, label [[BB2:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll @@ -129,11 +129,10 @@ ; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds double, double* [[INBUF:%.*]], i64 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[INBUF]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll @@ -110,17 +110,9 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 undef, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]] ; CHECK: if.then38: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> , double undef, i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> undef, [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> undef, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> undef, [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> undef, [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> undef, [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> undef, [[TMP6]] ; CHECK-NEXT: [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY_5_11_53_95_137_191_197_203_239_257_263_269_275_281_287_293_383_437_443_455_461_599_601:%.*]], %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601* undef, i64 0, i32 1, i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[AGG_TMP74663_SROA_0_0_IDX]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[AGG_TMP74663_SROA_0_0_IDX]] to <2 x double>* +; CHECK-NEXT: store <2 x double> undef, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: br label [[RETURN:%.*]] ; CHECK: if.then78: ; CHECK-NEXT: br label [[RETURN]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll @@ -5,10 +5,9 @@ ; CHECK-LABEL: @g( ; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x i8> [[X:%.*]], i32 0 ; CHECK-NEXT: [[Y1:%.*]] = extractelement <2 x i8> [[Y:%.*]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> poison, i8 [[X0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i8> [[TMP1]], i8 [[Y1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i8> [[TMP2]], [[TMP2]] -; CHECK-NEXT: ret <2 x i8> [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i8> [[X]], <2 x i8> [[Y]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i8> [[TMP1]], [[TMP1]] +; CHECK-NEXT: ret <2 x i8> [[TMP2]] ; %x0 = extractelement <2 x i8> %x, i32 0 %y1 = extractelement <2 x i8> %y, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll @@ -5,10 +5,9 @@ ; CHECK-LABEL: @g( ; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x i8> [[X:%.*]], i32 0 ; CHECK-NEXT: [[Y1:%.*]] = extractelement <2 x i8> [[Y:%.*]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> poison, i8 [[X0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i8> [[TMP1]], i8 [[Y1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i8> [[TMP2]], [[TMP2]] -; CHECK-NEXT: ret <2 x i8> [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i8> [[X]], <2 x i8> [[Y]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i8> [[TMP1]], [[TMP1]] +; CHECK-NEXT: ret <2 x i8> [[TMP2]] ; %x0 = extractelement <2 x i8> %x, i32 0 %y1 = extractelement <2 x i8> %y, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/extract.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract.ll @@ -54,14 +54,11 @@ ; CHECK-LABEL: @fextr2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load <4 x double>, <4 x double>* undef, align 32 -; CHECK-NEXT: [[V0:%.*]] = extractelement <4 x double> [[LD]], i32 0 -; CHECK-NEXT: [[V1:%.*]] = extractelement <4 x double> [[LD]], i32 1 ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V0]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[P0]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[LD]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[TMP0]], +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[P0]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP1]], <2 x double>* [[TMP2]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll @@ -82,23 +82,19 @@ ; CHECK-NEXT: ret float [[ADD]] ; ; THRESH1-LABEL: @f_used_twice_in_tree( -; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 -; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 -; THRESH1-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 -; THRESH1-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[X]], [[TMP3]] -; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 -; THRESH1-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 -; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]] +; THRESH1-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> +; THRESH1-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X]], [[TMP1]] +; THRESH1-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; THRESH1-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] ; THRESH1-NEXT: ret float [[ADD]] ; ; THRESH2-LABEL: @f_used_twice_in_tree( -; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 -; THRESH2-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 -; THRESH2-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 -; THRESH2-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[X]], [[TMP3]] -; THRESH2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 -; THRESH2-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 -; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]] +; THRESH2-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> +; THRESH2-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X]], [[TMP1]] +; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] ; THRESH2-NEXT: ret float [[ADD]] ; %x0 = extractelement <2 x float> %x, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -1334,8 +1334,8 @@ define void @PR49730() { ; CHECK-LABEL: @PR49730( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> poison, <4 x i32> ) -; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> poison, [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> undef, [[TMP1]] ; CHECK-NEXT: [[T12:%.*]] = sub nsw i32 undef, undef ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[T12]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll @@ -144,15 +144,14 @@ ; MINTREESIZE-NEXT: [[Q1:%.*]] = extractelement <4 x float> [[RD]], i32 1 ; MINTREESIZE-NEXT: [[Q2:%.*]] = extractelement <4 x float> [[RD]], i32 2 ; MINTREESIZE-NEXT: [[Q3:%.*]] = extractelement <4 x float> [[RD]], i32 3 -; MINTREESIZE-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[Q2]], i32 0 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[Q3]], i32 1 +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> poison, <2 x i32> ; MINTREESIZE-NEXT: [[Q4:%.*]] = fadd float [[Q0]], [[Q1]] ; MINTREESIZE-NEXT: [[Q5:%.*]] = fadd float [[Q2]], [[Q3]] -; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[Q4]], i32 0 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[Q5]], i32 1 +; MINTREESIZE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[Q4]], i32 0 +; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[Q5]], i32 1 ; MINTREESIZE-NEXT: [[Q6:%.*]] = fadd float [[Q4]], [[Q5]] -; MINTREESIZE-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[Q6]], i32 0 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[Q5]], i32 1 +; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[Q6]], i32 0 +; MINTREESIZE-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[Q5]], i32 1 ; MINTREESIZE-NEXT: [[QI:%.*]] = fcmp olt float [[Q6]], [[Q5]] ; MINTREESIZE-NEXT: call void @llvm.assume(i1 [[QI]]) ; MINTREESIZE-NEXT: ret <4 x float> undef @@ -283,25 +282,19 @@ ; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1 ; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2 ; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[C0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[C2]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[C3]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <2 x i32> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[A0]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[A1]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[B0]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[B1]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[A2]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[A3]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[B2]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[B3]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP6]], <2 x float> [[TMP13]], <2 x float> [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP18]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[C]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[C]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP2]], <2 x float> [[TMP5]], <2 x float> [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP4]], <2 x float> [[TMP8]], <2 x float> [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP12]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[RD1]] ; %c0 = extractelement <4 x i32> %c, i32 0 @@ -435,24 +428,12 @@ ; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @reschedule_extract( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B]], i32 2 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B]], i32 1 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[B]], i32 0 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[A]], i32 2 -; MINTREESIZE-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[A]], i32 1 -; MINTREESIZE-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[A]], i32 0 -; MINTREESIZE-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0 -; MINTREESIZE-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP4]], i32 1 -; MINTREESIZE-NEXT: [[TMP11:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 -; MINTREESIZE-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP3]], i32 1 -; MINTREESIZE-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0 -; MINTREESIZE-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[TMP2]], i32 1 -; MINTREESIZE-NEXT: [[TMP16:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0 -; MINTREESIZE-NEXT: [[TMP17:%.*]] = insertelement <2 x float> [[TMP16]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: ret <4 x float> [[TMP11]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -485,24 +466,12 @@ ; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @take_credit( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B]], i32 2 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B]], i32 1 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[B]], i32 0 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[A]], i32 2 -; MINTREESIZE-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[A]], i32 1 -; MINTREESIZE-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[A]], i32 0 -; MINTREESIZE-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0 -; MINTREESIZE-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP4]], i32 1 -; MINTREESIZE-NEXT: [[TMP11:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 -; MINTREESIZE-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP11]], float [[TMP3]], i32 1 -; MINTREESIZE-NEXT: [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0 -; MINTREESIZE-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP2]], i32 1 -; MINTREESIZE-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0 -; MINTREESIZE-NEXT: [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP17:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <4 x float> [[TMP17]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -559,40 +528,16 @@ ; NOTHRESHOLD-NEXT: ret <8 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @_vadd256( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[B:%.*]], i32 7 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[B]], i32 6 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[B]], i32 5 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[B]], i32 4 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = extractelement <8 x float> [[B]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[B]], i32 2 -; MINTREESIZE-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[B]], i32 1 -; MINTREESIZE-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[B]], i32 0 -; MINTREESIZE-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7 -; MINTREESIZE-NEXT: [[TMP10:%.*]] = extractelement <8 x float> [[A]], i32 6 -; MINTREESIZE-NEXT: [[TMP11:%.*]] = extractelement <8 x float> [[A]], i32 5 -; MINTREESIZE-NEXT: [[TMP12:%.*]] = extractelement <8 x float> [[A]], i32 4 -; MINTREESIZE-NEXT: [[TMP13:%.*]] = extractelement <8 x float> [[A]], i32 3 -; MINTREESIZE-NEXT: [[TMP14:%.*]] = extractelement <8 x float> [[A]], i32 2 -; MINTREESIZE-NEXT: [[TMP15:%.*]] = extractelement <8 x float> [[A]], i32 1 -; MINTREESIZE-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[A]], i32 0 -; MINTREESIZE-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP16]], i32 0 -; MINTREESIZE-NEXT: [[TMP18:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP8]], i32 1 -; MINTREESIZE-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i32 0 -; MINTREESIZE-NEXT: [[TMP20:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP7]], i32 1 -; MINTREESIZE-NEXT: [[TMP21:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i32 0 -; MINTREESIZE-NEXT: [[TMP22:%.*]] = insertelement <2 x float> [[TMP21]], float [[TMP6]], i32 1 -; MINTREESIZE-NEXT: [[TMP23:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0 -; MINTREESIZE-NEXT: [[TMP24:%.*]] = insertelement <2 x float> [[TMP23]], float [[TMP5]], i32 1 -; MINTREESIZE-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i32 0 -; MINTREESIZE-NEXT: [[TMP26:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP4]], i32 1 -; MINTREESIZE-NEXT: [[TMP27:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i32 0 -; MINTREESIZE-NEXT: [[TMP28:%.*]] = insertelement <2 x float> [[TMP27]], float [[TMP3]], i32 1 -; MINTREESIZE-NEXT: [[TMP29:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i32 0 -; MINTREESIZE-NEXT: [[TMP30:%.*]] = insertelement <2 x float> [[TMP29]], float [[TMP2]], i32 1 -; MINTREESIZE-NEXT: [[TMP31:%.*]] = insertelement <2 x float> poison, float [[TMP9]], i32 0 -; MINTREESIZE-NEXT: [[TMP32:%.*]] = insertelement <2 x float> [[TMP31]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP33:%.*]] = fadd <8 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <8 x float> [[TMP33]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP9:%.*]] = fadd <8 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <8 x float> [[TMP9]] ; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %b, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -179,15 +179,14 @@ ; MINTREESIZE-NEXT: [[Q1:%.*]] = extractelement <4 x float> [[RD]], i32 1 ; MINTREESIZE-NEXT: [[Q2:%.*]] = extractelement <4 x float> [[RD]], i32 2 ; MINTREESIZE-NEXT: [[Q3:%.*]] = extractelement <4 x float> [[RD]], i32 3 -; MINTREESIZE-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[Q2]], i32 0 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[Q3]], i32 1 +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> poison, <2 x i32> ; MINTREESIZE-NEXT: [[Q4:%.*]] = fadd float [[Q0]], [[Q1]] ; MINTREESIZE-NEXT: [[Q5:%.*]] = fadd float [[Q2]], [[Q3]] -; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[Q4]], i32 0 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[Q5]], i32 1 +; MINTREESIZE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[Q4]], i32 0 +; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[Q5]], i32 1 ; MINTREESIZE-NEXT: [[Q6:%.*]] = fadd float [[Q4]], [[Q5]] -; MINTREESIZE-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[Q6]], i32 0 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[Q5]], i32 1 +; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[Q6]], i32 0 +; MINTREESIZE-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[Q5]], i32 1 ; MINTREESIZE-NEXT: [[QI:%.*]] = fcmp olt float [[Q6]], [[Q5]] ; MINTREESIZE-NEXT: call void @llvm.assume(i1 [[QI]]) ; MINTREESIZE-NEXT: ret <4 x float> undef @@ -318,25 +317,19 @@ ; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1 ; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2 ; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[C0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[C2]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[C3]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <2 x i32> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[A0]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[A1]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[B0]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[B1]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[A2]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[A3]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[B2]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[B3]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP6]], <2 x float> [[TMP13]], <2 x float> [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP18]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[C]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[C]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP2]], <2 x float> [[TMP5]], <2 x float> [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP4]], <2 x float> [[TMP8]], <2 x float> [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP12]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[RD1]] ; %c0 = extractelement <4 x i32> %c, i32 0 @@ -470,24 +463,12 @@ ; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @reschedule_extract( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B]], i32 2 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B]], i32 1 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[B]], i32 0 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[A]], i32 2 -; MINTREESIZE-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[A]], i32 1 -; MINTREESIZE-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[A]], i32 0 -; MINTREESIZE-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0 -; MINTREESIZE-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP4]], i32 1 -; MINTREESIZE-NEXT: [[TMP11:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 -; MINTREESIZE-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP3]], i32 1 -; MINTREESIZE-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0 -; MINTREESIZE-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[TMP2]], i32 1 -; MINTREESIZE-NEXT: [[TMP16:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0 -; MINTREESIZE-NEXT: [[TMP17:%.*]] = insertelement <2 x float> [[TMP16]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: ret <4 x float> [[TMP11]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -520,24 +501,12 @@ ; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @take_credit( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B]], i32 2 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B]], i32 1 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[B]], i32 0 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[A]], i32 2 -; MINTREESIZE-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[A]], i32 1 -; MINTREESIZE-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[A]], i32 0 -; MINTREESIZE-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0 -; MINTREESIZE-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP4]], i32 1 -; MINTREESIZE-NEXT: [[TMP11:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 -; MINTREESIZE-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP11]], float [[TMP3]], i32 1 -; MINTREESIZE-NEXT: [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0 -; MINTREESIZE-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP2]], i32 1 -; MINTREESIZE-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0 -; MINTREESIZE-NEXT: [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP17:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <4 x float> [[TMP17]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -594,40 +563,16 @@ ; NOTHRESHOLD-NEXT: ret <8 x float> [[TMP1]] ; ; MINTREESIZE-LABEL: @_vadd256( -; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[B:%.*]], i32 7 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[B]], i32 6 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[B]], i32 5 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[B]], i32 4 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = extractelement <8 x float> [[B]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[B]], i32 2 -; MINTREESIZE-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[B]], i32 1 -; MINTREESIZE-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[B]], i32 0 -; MINTREESIZE-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7 -; MINTREESIZE-NEXT: [[TMP10:%.*]] = extractelement <8 x float> [[A]], i32 6 -; MINTREESIZE-NEXT: [[TMP11:%.*]] = extractelement <8 x float> [[A]], i32 5 -; MINTREESIZE-NEXT: [[TMP12:%.*]] = extractelement <8 x float> [[A]], i32 4 -; MINTREESIZE-NEXT: [[TMP13:%.*]] = extractelement <8 x float> [[A]], i32 3 -; MINTREESIZE-NEXT: [[TMP14:%.*]] = extractelement <8 x float> [[A]], i32 2 -; MINTREESIZE-NEXT: [[TMP15:%.*]] = extractelement <8 x float> [[A]], i32 1 -; MINTREESIZE-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[A]], i32 0 -; MINTREESIZE-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP16]], i32 0 -; MINTREESIZE-NEXT: [[TMP18:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP8]], i32 1 -; MINTREESIZE-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i32 0 -; MINTREESIZE-NEXT: [[TMP20:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP7]], i32 1 -; MINTREESIZE-NEXT: [[TMP21:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i32 0 -; MINTREESIZE-NEXT: [[TMP22:%.*]] = insertelement <2 x float> [[TMP21]], float [[TMP6]], i32 1 -; MINTREESIZE-NEXT: [[TMP23:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0 -; MINTREESIZE-NEXT: [[TMP24:%.*]] = insertelement <2 x float> [[TMP23]], float [[TMP5]], i32 1 -; MINTREESIZE-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i32 0 -; MINTREESIZE-NEXT: [[TMP26:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP4]], i32 1 -; MINTREESIZE-NEXT: [[TMP27:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i32 0 -; MINTREESIZE-NEXT: [[TMP28:%.*]] = insertelement <2 x float> [[TMP27]], float [[TMP3]], i32 1 -; MINTREESIZE-NEXT: [[TMP29:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i32 0 -; MINTREESIZE-NEXT: [[TMP30:%.*]] = insertelement <2 x float> [[TMP29]], float [[TMP2]], i32 1 -; MINTREESIZE-NEXT: [[TMP31:%.*]] = insertelement <2 x float> poison, float [[TMP9]], i32 0 -; MINTREESIZE-NEXT: [[TMP32:%.*]] = insertelement <2 x float> [[TMP31]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP33:%.*]] = fadd <8 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <8 x float> [[TMP33]] +; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <2 x i32> +; MINTREESIZE-NEXT: [[TMP9:%.*]] = fadd <8 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <8 x float> [[TMP9]] ; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %b, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll @@ -16,12 +16,12 @@ ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP7]], <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> , <4 x float> [[TMP7]], <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP3]], i32 2 ; CHECK-NEXT: [[TMP10:%.*]] = fmul <4 x float> [[TMP6]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x float> poison, [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = fadd <4 x float> [[TMP11]], poison -; CHECK-NEXT: [[TMP13:%.*]] = fadd <4 x float> [[TMP12]], poison +; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x float> undef, [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = fadd <4 x float> [[TMP11]], undef +; CHECK-NEXT: [[TMP13:%.*]] = fadd <4 x float> [[TMP12]], undef ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[TMP13]], i32 0 ; CHECK-NEXT: [[VEC1:%.*]] = insertelement <2 x float> undef, float [[TMP14]], i32 0 ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP13]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll @@ -52,7 +52,7 @@ ; CHECK-NEXT: ret void ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ poison, [[ENTRY]] ], [ [[TMP26]], [[FOR_INC]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ undef, [[ENTRY]] ], [ [[TMP26]], [[FOR_INC]] ] ; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll @@ -69,22 +69,15 @@ ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP7]], i32 2 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP9]], i32 3 -; CHECK-NEXT: [[TMP11:%.*]] = mul <4 x i32> [[TMP2]], [[TMP10]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[TMP2]], [[TMP3]] ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0 ; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1 ; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2 ; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP12]], align 4 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: ret i32 undef ; %in.addr = getelementptr inbounds i32, i32* %in, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll @@ -600,21 +600,16 @@ ; CHECK-NEXT: [[LOADVEC4:%.*]] = load <2 x double>, <2 x double>* [[VECPTR4:%.*]], align 4 ; CHECK-NEXT: [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0 ; CHECK-NEXT: [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[EXTRB0]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[EXTRA1]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP4]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[TMP10]], double [[EXTRB1]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[TMP11]], [[TMP2]] -; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], [[TMP9]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[LOADVEC3]], <2 x double> [[LOADVEC2]], <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[LOADVEC]], <2 x double> [[LOADVEC4]], <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP7]], [[TMP5]] ; CHECK-NEXT: [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0 ; CHECK-NEXT: [[SIDX1:%.*]] = getelementptr inbounds double, double* [[STOREARRAY]], i64 1 -; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[SIDX0]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP13]], <2 x double>* [[TMP14]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[SIDX0]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP8]], <2 x double>* [[TMP9]], align 8 ; CHECK-NEXT: ret void ; %idx0 = getelementptr inbounds double, double* %array, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll @@ -17,23 +17,17 @@ ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[SUB86_1]], i32 4 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 [[ADD78_2]], i32 5 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> poison, i32 [[SUB86_1]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD78_1]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[ADD94_1]], i32 2 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB102_1]], i32 3 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[ADD78_2]], i32 4 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[SUB102_3]], i32 5 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP13:%.*]] = sub nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i32> [[TMP12]], <16 x i32> [[TMP13]], <16 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = lshr <16 x i32> [[TMP14]], -; CHECK-NEXT: [[TMP16:%.*]] = and <16 x i32> [[TMP15]], -; CHECK-NEXT: [[TMP17:%.*]] = mul nuw <16 x i32> [[TMP16]], -; CHECK-NEXT: [[TMP18:%.*]] = add <16 x i32> [[TMP17]], [[TMP14]] -; CHECK-NEXT: [[TMP19:%.*]] = xor <16 x i32> [[TMP18]], [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP19]]) -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP20]], 16 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> [[SHUFFLE]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <16 x i32> [[SHUFFLE]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = sub nsw <16 x i32> [[SHUFFLE]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i32> [[TMP7]], <16 x i32> [[TMP8]], <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = lshr <16 x i32> [[TMP9]], +; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i32> [[TMP10]], +; CHECK-NEXT: [[TMP12:%.*]] = mul nuw <16 x i32> [[TMP11]], +; CHECK-NEXT: [[TMP13:%.*]] = add <16 x i32> [[TMP12]], [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP13]], [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP14]]) +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP15]], 16 ; CHECK-NEXT: [[ADD119:%.*]] = add nuw nsw i32 undef, [[SHR]] ; CHECK-NEXT: [[SHR120:%.*]] = lshr i32 [[ADD119]], 1 ; CHECK-NEXT: ret i32 [[SHR120]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll @@ -13,13 +13,13 @@ ; CHECK: if.end: ; CHECK-NEXT: [[SUB14:%.*]] = sub nsw i32 [[Y_POS:%.*]], undef ; CHECK-NEXT: [[SHR15:%.*]] = ashr i32 [[SUB14]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[SHR15]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[SUB14]], i32 1 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[SHR15]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[SUB14]], i32 1 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP0]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP3]], poison -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP3]], <4 x i32> poison +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[SHUFFLE]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP3]], undef +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP3]], <4 x i32> undef ; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i32> [[TMP5]] to <4 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP7]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll @@ -244,27 +244,20 @@ ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ , [[ENTRY]] ], [ [[TMP9:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP5]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP9]] = fmul <4 x float> [[TMP8]], +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ , [[ENTRY]] ], [ [[TMP2:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2]] = fmul <4 x float> [[TMP1]], ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], 128 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP9]], i32 1 -; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[TMP10]], [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i32 2 -; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP9]], i32 3 -; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP13]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 +; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 +; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 +; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP6]] ; CHECK-NEXT: ret float [[ADD31]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll @@ -26,9 +26,9 @@ ; AVX-LABEL: @foo( ; AVX-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 0), align 16 ; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 2), align 8 -; AVX-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i32 0 -; AVX-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP2]], i32 1 -; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> +; AVX-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i32 0 +; AVX-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP2]], i32 1 +; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> ; AVX-NEXT: store <8 x i32> [[SHUFFLE]], <8 x i32>* bitcast ([8 x i32]* @a to <8 x i32>*), align 16 ; AVX-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr49081.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr49081.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/pr49081.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr49081.ll @@ -8,7 +8,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = sitofp i32 [[TMP2]] to float ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> undef, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = sitofp <2 x i32> [[TMP6]] to <2 x float> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP8]], <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll @@ -343,18 +343,15 @@ ; CHECK-NEXT: [[Y1:%.*]] = extractelement <8 x i32> [[Y]], i32 1 ; CHECK-NEXT: [[Y2:%.*]] = extractelement <8 x i32> [[Y]], i32 2 ; CHECK-NEXT: [[Y3:%.*]] = extractelement <8 x i32> [[Y]], i32 3 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[X0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[X1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[X2]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X3]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], ; CHECK-NEXT: [[D0:%.*]] = icmp slt i32 [[X0]], [[Y0]] ; CHECK-NEXT: [[D1:%.*]] = icmp slt i32 [[X1]], [[Y1]] ; CHECK-NEXT: [[D2:%.*]] = icmp slt i32 [[X2]], [[Y2]] ; CHECK-NEXT: [[D3:%.*]] = icmp slt i32 [[X3]], [[Y3]] -; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]]) -; CHECK-NEXT: [[S4:%.*]] = select i1 [[TMP7]], i1 [[D0]], i1 false +; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP3]]) +; CHECK-NEXT: [[S4:%.*]] = select i1 [[TMP4]], i1 [[D0]], i1 false ; CHECK-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false ; CHECK-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false ; CHECK-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll @@ -16,12 +16,10 @@ ; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 5 ; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 6 ; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 7 -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i16> [[LD]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[P0]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* [[TMP3]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[LD]], [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP1]], <8 x i16>* [[TMP2]], align 2 ; CHECK-NEXT: ret void ; ; YAML: Pass: slp-vectorizer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll @@ -15,9 +15,9 @@ ; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 4 ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 5 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], poison -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> poison, <4 x i32> [[SHUFFLE1]] -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> poison, <4 x i32> zeroinitializer, <4 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], undef +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> undef, <4 x i32> [[SHUFFLE1]] +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> [[TMP4]] ; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 6 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP27]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-alt-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-alt-shuffle.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-alt-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-alt-shuffle.ll @@ -17,9 +17,9 @@ ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[D:%.*]], i64 -1 ; CHECK-NEXT: [[ADD_PTR37:%.*]] = getelementptr inbounds float, float* [[D]], i64 -2 ; CHECK-NEXT: [[ADD_PTR45:%.*]] = getelementptr inbounds float, float* [[D]], i64 -3 -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> poison, [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> undef, [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = sitofp <4 x i32> [[TMP6]] to <4 x float> -; CHECK-NEXT: [[TMP8:%.*]] = fdiv <4 x float> [[TMP7]], poison +; CHECK-NEXT: [[TMP8:%.*]] = fdiv <4 x float> [[TMP7]], undef ; CHECK-NEXT: [[ADD_PTR53:%.*]] = getelementptr inbounds float, float* [[D]], i64 -4 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[ADD_PTR53]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP8]], <4 x float>* [[TMP9]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll @@ -12,28 +12,22 @@ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[CONV]], i32 1 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP18:%.*]], [[BB3:%.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP10:%.*]], [[BB3:%.*]] ] ; CHECK-NEXT: [[TMP3:%.*]] = load double, double* undef, align 8 ; CHECK-NEXT: br i1 undef, label [[BB3]], label [[BB4:%.*]] ; CHECK: bb4: ; CHECK-NEXT: [[CONV2:%.*]] = uitofp i16 undef to double +; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[TMP3]], [[CONV2]] ; CHECK-NEXT: [[TMP4:%.*]] = fpext <4 x float> [[TMP2]] to <4 x double> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> , double [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> , double [[CONV2]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x double> poison, double [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x double> [[TMP11]], double [[TMP12]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = fcmp ogt <4 x double> [[TMP13]], [[TMP4]] -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = fptrunc <4 x double> [[TMP15]] to <4 x float> -; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP14]], <4 x float> [[TMP2]], <4 x float> [[TMP16]] +; CHECK-NEXT: [[SUB1:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> poison, double [[SUB1]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[ADD1]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = fcmp ogt <4 x double> [[TMP6]], [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = fptrunc <4 x double> [[TMP6]] to <4 x float> +; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP2]], <4 x float> [[TMP8]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP18]] = phi <4 x float> [ [[TMP17]], [[BB4]] ], [ [[TMP2]], [[BB2]] ] +; CHECK-NEXT: [[TMP10]] = phi <4 x float> [ [[TMP9]], [[BB4]] ], [ [[TMP2]], [[BB2]] ] ; CHECK-NEXT: br label [[BB2]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll --- a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll @@ -43,8 +43,8 @@ define void @test2() { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> poison, <4 x i32> ) -; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <4 x i32> poison, [[TMP0]] +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <4 x i32> undef, [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP2]], i32 77) ; CHECK-NEXT: [[E:%.*]] = icmp ugt i32 [[TMP3]], 1