diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6870,6 +6870,37 @@ return Cost; } +/// Checks if the buildvector last insertelement, with \p I as a part of this +/// buildvector, dominates all other uses of \p Root. +static bool dominatesAllUses(InsertElementInst *I, InsertElementInst *Root) { + if (I->getParent() != Root->getParent()) + return false; + // Find top insertelement of the buildvector. + Value *V = I; + InsertElementInst *Top = I; + while (auto *IE = dyn_cast(V)) { + if (!IE->hasOneUse()) + break; + Top = IE; + V = IE->user_back(); + } + if (Top->getParent() != Root->getParent()) + return false; + BasicBlock *BB = Top->getParent(); + unsigned Cnt = 0; + for (const User *U : Root->users()) { + auto *UInst = dyn_cast(U); + if (!UInst || UInst->getParent() != BB) + continue; + if (UInst->comesBefore(Top) || UInst == Top) { + ++Cnt; + if (Cnt > 1) + return false; + } + } + return true; +} + /// Check if two insertelement instructions are from the same buildvector. static bool areTwoInsertFromSameBuildVector( InsertElementInst *VU, InsertElementInst *V, @@ -6883,6 +6914,10 @@ // Multiple used inserts are separate nodes. if (!VU->hasOneUse() && !V->hasOneUse()) return false; + auto &&IsAllowedToBeInBuildVector = [](InsertElementInst *V, + InsertElementInst *TopI) { + return V->hasOneUse() || dominatesAllUses(TopI, V); + }; auto *IE1 = VU; auto *IE2 = V; unsigned Idx1 = *getInsertIndex(IE1); @@ -6892,9 +6927,9 @@ // IE1. do { if (IE2 == VU) - return VU->hasOneUse(); + return IsAllowedToBeInBuildVector(VU, V); if (IE1 == V) - return V->hasOneUse(); + return IsAllowedToBeInBuildVector(V, VU); if (IE1) { if ((IE1 != VU && !IE1->hasOneUse()) || getInsertIndex(IE1).value_or(Idx2) == Idx2) @@ -6944,7 +6979,8 @@ namespace { /// Returns incoming Value *, if the requested type is Value * too, or a default -/// value, otherwise. +/// value, otherwise. Also, if the requested type is Value * and the incoming +/// value is not Value *, returns nullptr. struct ValueSelect { template static std::enable_if_t::value, Value *> @@ -6952,6 +6988,11 @@ return V; } template + static std::enable_if_t::value, Value *> + get(U) { + return nullptr; + } + template static std::enable_if_t::value, U> get(Value *) { return U(); } @@ -6980,21 +7021,36 @@ SmallVector Mask(ShuffleMask.begin()->second); auto VMIt = std::next(ShuffleMask.begin()); T *Prev = nullptr; - bool IsBaseNotUndef = !isUndefVector(Base); + Value *FirstV = ValueSelect::get(ShuffleMask.begin()->first); + bool IsBaseNotUndef = + !(FirstV ? (isa(Base) || + (isUndefVector(Base) && isGuaranteedNotToBePoison(FirstV))) + : isUndefVector(Base)); if (IsBaseNotUndef) { // Base is not undef, need to combine it with the next subvectors. std::pair Res = ResizeAction(ShuffleMask.begin()->first, Mask); - for (unsigned Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) { - if (Mask[Idx] == UndefMaskElem) + bool IsCompleteIdentity = true; + for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) { + if (Mask[Idx] == UndefMaskElem) { Mask[Idx] = Idx; - else + IsCompleteIdentity = false; + } else { + IsCompleteIdentity &= (Res.second || Mask[Idx] == Idx); Mask[Idx] = (Res.second ? Idx : Mask[Idx]) + VF; + } + } + if (IsCompleteIdentity) { + // Found complete identity, i.e. elements only from single vector are + // selected in the identity order. + Prev = Res.first; + std::iota(Mask.begin(), Mask.end(), 0); + } else { + auto *V = ValueSelect::get(Base); + (void)V; + assert((!V || GetVF(V) == Mask.size()) && + "Expected base vector of VF number of elements."); + Prev = Action(Mask, {nullptr, Res.first}); } - auto *V = ValueSelect::get(Base); - (void)V; - assert((!V || GetVF(V) == Mask.size()) && - "Expected base vector of VF number of elements."); - Prev = Action(Mask, {nullptr, Res.first}); } else if (ShuffleMask.size() == 1) { // Base is undef and only 1 vector is shuffled - perform the action only for // single vector, if the mask is not the identity mask. @@ -7807,43 +7863,63 @@ DenseMap UniquePositions; unsigned NumValues = std::distance(VL.begin(), find_if(reverse(VL), [](Value *V) { - return !isa(V); + return !isa(V); }).base()); - VF = std::max(VF, PowerOf2Ceil(NumValues)); - int UniqueVals = 0; + VF = std::min(VF, PowerOf2Ceil(NumValues)); + bool IsIdentity = true; + int UndefIdx = UndefMaskElem; + int ReplaceUndefIdx = UndefMaskElem; for (Value *V : VL.drop_back(VL.size() - VF)) { - if (isa(V)) { + if (isa(V)) { ReuseShuffleIndicies.emplace_back(UndefMaskElem); continue; } if (isConstant(V)) { - ReuseShuffleIndicies.emplace_back(UniqueValues.size()); - UniqueValues.emplace_back(V); + if (isa(V)) { + if (UndefIdx == UndefMaskElem) { + UndefIdx = UniqueValues.size(); + UniqueValues.emplace_back(V); + } + ReuseShuffleIndicies.emplace_back(UndefIdx); + } else { + ReuseShuffleIndicies.emplace_back(UniqueValues.size()); + UniqueValues.emplace_back(V); + } continue; } auto Res = UniquePositions.try_emplace(V, UniqueValues.size()); ReuseShuffleIndicies.emplace_back(Res.first->second); if (Res.second) { UniqueValues.emplace_back(V); - ++UniqueVals; - } - } - if (UniqueVals == 1 && UniqueValues.size() == 1) { - // Emit pure splat vector. - ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(), - UndefMaskElem); - } else if (UniqueValues.size() >= VF - 1 || UniqueValues.size() <= 1) { - if (UniqueValues.empty()) { - assert(all_of(VL, UndefValue::classof) && "Expected list of undefs."); - NumValues = VF; - } + if (ReplaceUndefIdx == UndefMaskElem) + ReplaceUndefIdx = Res.first->second; + } else { + IsIdentity = false; + } + } + // Check if undef values can be safely replaced by some non-const vals. + if (UndefIdx != UndefMaskElem && ReplaceUndefIdx != UndefMaskElem && + !IsIdentity) { + int MinIdx = std::min(ReplaceUndefIdx, UndefIdx); + for (unsigned I = 0; I < VF; ++I) + if (ReuseShuffleIndicies[I] == UndefIdx || + ReuseShuffleIndicies[I] == ReplaceUndefIdx) + ReuseShuffleIndicies[I] = MinIdx; + if (MinIdx != ReplaceUndefIdx) { + std::swap(UniqueValues[ReplaceUndefIdx], UniqueValues[UndefIdx]); + UndefIdx = ReplaceUndefIdx; + } + UniqueValues.erase(std::next(UniqueValues.begin(), UndefIdx)); + } + if (!IsIdentity) { + UniqueValues.append((UniqueValues.size() == 1 ? VL.size() : VF) - + UniqueValues.size(), + PoisonValue::get(VL[0]->getType())); + ReuseShuffleIndicies.append(VL.size() - VF, UndefMaskElem); + VL = UniqueValues; + } else { ReuseShuffleIndicies.clear(); - UniqueValues.clear(); - UniqueValues.append(VL.begin(), std::next(VL.begin(), NumValues)); } - UniqueValues.append(VF - UniqueValues.size(), - PoisonValue::get(VL[0]->getType())); - VL = UniqueValues; } ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq, @@ -8019,11 +8095,12 @@ } } - if ((!IsIdentity || Offset != 0 || - !isUndefVector(FirstInsert->getOperand(0))) && + bool IsUndefFirstOp = isUndefVector(FirstInsert->getOperand(0)); + if ((!IsIdentity || Offset != 0 || !IsUndefFirstOp) && NumElts != NumScalars) { - SmallVector InsertMask(NumElts); - std::iota(InsertMask.begin(), InsertMask.end(), 0); + SmallVector InsertMask(NumElts, UndefMaskElem); + if (!IsUndefFirstOp) + std::iota(InsertMask.begin(), InsertMask.end(), 0); for (unsigned I = 0; I < NumElts; I++) { if (Mask[I] != UndefMaskElem) InsertMask[Offset + I] = NumElts + I; @@ -8885,6 +8962,8 @@ PoisonValue::get(IE->getOperand(0)->getType())); IE->replaceUsesOfWith(IE->getOperand(1), PoisonValue::get(IE->getOperand(1)->getType())); + if (IE == FirstInsert && dominatesAllUses(LastInsert, FirstInsert)) + FirstInsert->replaceAllUsesWith(NewInst); eraseInstruction(IE); } CSEBlocks.insert(LastInsert->getParent()); @@ -8996,25 +9075,91 @@ return I1->isIdenticalTo(I2); if (SI1->isIdenticalTo(SI2)) return true; - for (int I = 0, E = SI1->getNumOperands(); I < E; ++I) - if (SI1->getOperand(I) != SI2->getOperand(I)) + for (int I = 0, E = SI1->getNumOperands(); I < E; ++I) { + Value *SIOp = SI1->getOperand(I); + if (isa(SIOp) && + SIOp->getType() == SI2->getOperand(I)->getType()) + continue; + if (!any_of(SI2->operands(), [SIOp](Value *Op) { return Op == SIOp; }) || + any_of(SI2->operands(), [SI1](Value *Op) { return Op == SI1; })) return false; + } // Check if the second instruction is more defined than the first one. NewMask.assign(SI2->getShuffleMask().begin(), SI2->getShuffleMask().end()); ArrayRef SM1 = SI1->getShuffleMask(); // Count trailing undefs in the mask to check the final number of used // registers. + auto &&IsStrictUndef = [](Value *V) { + return isa_and_nonnull(V) && !isa(V); + }; unsigned LastUndefsCnt = 0; + unsigned VF = cast(SI1->getOperand(0)->getType()) + ->getElementCount() + .getKnownMinValue(); + // Tries to fetch the vector element (scalar value) for \p VecOp using given + // index \p Idx. \p RootOp is used to stop lookup through shuffles, if \p + // RootOp is a root of the \p VecOp shuffle. + auto &&GetVecElem = [VF](Value *&VecOp, int Idx, Value *RootOp) -> Value * { + unsigned EVF = VF; + while (auto *SVOp = dyn_cast(VecOp)) { + VecOp = SVOp->getOperand(Idx / EVF); + Idx = SVOp->getMaskValue(Idx % EVF); + auto *FTy = dyn_cast(VecOp->getType()); + if (!FTy) + break; + EVF = FTy->getElementCount().getKnownMinValue(); + if (Idx == UndefMaskElem) + return PoisonValue::get( + cast(VecOp->getType())->getElementType()); + if (VecOp == RootOp) + break; + } + if (isa(VecOp)) + return PoisonValue::get( + cast(VecOp->getType())->getElementType()); + if (isa(VecOp)) + return UndefValue::get( + cast(VecOp->getType())->getElementType()); + if (auto *CV1 = dyn_cast(VecOp)) + return CV1->getAggregateElement(Idx % EVF); + return nullptr; + }; for (int I = 0, E = NewMask.size(); I < E; ++I) { if (SM1[I] == UndefMaskElem) ++LastUndefsCnt; else LastUndefsCnt = 0; - if (NewMask[I] != UndefMaskElem && SM1[I] != UndefMaskElem && - NewMask[I] != SM1[I]) - return false; - if (NewMask[I] == UndefMaskElem) + if (NewMask[I] == UndefMaskElem) { NewMask[I] = SM1[I]; + continue; + } + Value *VecOp1 = SI2->getOperand(NewMask[I] / VF); + unsigned VecOp2Pos = SM1[I] / VF; + Value *VecOp2 = + SM1[I] == UndefMaskElem ? nullptr : SI1->getOperand(VecOp2Pos); + if (SM1[I] == UndefMaskElem || (NewMask[I] == SM1[I] && VecOp1 == VecOp2)) + continue; + // Check if one mask can be safely replaced by another. + Value *CV1Elem = GetVecElem(VecOp1, NewMask[I], VecOp2); + Value *CV2Elem = GetVecElem(VecOp2, SM1[I], VecOp1); + if (isa_and_nonnull(CV1Elem) || + (IsStrictUndef(CV1Elem) && + (IsStrictUndef(CV2Elem) || + (CV2Elem && isGuaranteedNotToBePoison(CV2Elem)) || + (!CV2Elem && isGuaranteedNotToBePoison(VecOp2))))) { + NewMask[I] = SM1[I] % VF + VecOp2Pos * VF; + continue; + } + if ((CV1Elem && CV1Elem == CV2Elem) || + isa_and_nonnull(CV2Elem) || + (IsStrictUndef(CV2Elem) && + (IsStrictUndef(CV1Elem) || + (CV1Elem && isGuaranteedNotToBePoison(CV1Elem)) || + (!CV1Elem && isGuaranteedNotToBePoison(VecOp1))))) { + NewMask[I] = NewMask[I] % VF + VecOp2Pos * VF; + continue; + } + return false; } // Check if the last undefs actually change the final number of used vector // registers. diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll @@ -19,21 +19,19 @@ ; CHECK-LABEL: @s116_modified( ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 0 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, float* [[A]], i64 1 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, float* [[A]], i64 2 -; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds float, float* [[A]], i64 4 -; CHECK-NEXT: [[LD1:%.*]] = load float, float* [[GEP1]], align 4 +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, float* [[A]], i64 3 ; CHECK-NEXT: [[LD0:%.*]] = load float, float* [[GEP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP2]] to <2 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP1]] to <2 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[LD4:%.*]] = load float, float* [[GEP4]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[LD4]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[LD1]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[LD1]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <4 x float> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[GEP3]] to <2 x float>* +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP4]], <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <4 x float> [[TMP9]], [[SHUFFLE]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[GEP0]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP10]], <4 x float>* [[TMP11]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll @@ -288,15 +288,15 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[V1_LANE_3]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> poison, double [[V2_LANE_2]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[V2_LANE_1]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[V2_LANE_2]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x double> [[TMP6]], double [[V2_LANE_0]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x double> [[TMP3]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> poison, <9 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[V2_LANE_0]], i32 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP3]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> poison, <9 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_0]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_1]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_3]]) -; CHECK-NEXT: store <9 x double> [[TMP9]], <9 x double>* [[PTR_1]], align 8 +; CHECK-NEXT: store <9 x double> [[TMP8]], <9 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll @@ -11,7 +11,7 @@ ; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float> ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> poison, <8 x i32> [[TMP6]], <8 x i32> +; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> poison, <8 x i32> [[TMP6]], <8 x i32> ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll @@ -11,7 +11,7 @@ ; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float> ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> undef, <8 x i32> [[TMP6]], <8 x i32> +; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> undef, <8 x i32> [[TMP6]], <8 x i32> ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll @@ -7,14 +7,14 @@ ; CHECK-NEXT: [[CALL:%.*]] = load i16, i16* undef, align 2 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> , i16 [[CALL37:%.*]], i32 4 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[CALL]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> , i16 [[CALL37]], i32 3 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[CALL37]], i32 6 -; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <8 x i16> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <8 x i16> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> [[TMP5]], <8 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = zext <8 x i1> [[TMP6]] to <8 x i16> -; CHECK-NEXT: [[TMP8:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[TMP7]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = add i16 [[TMP8]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> , i16 [[CALL37]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i16> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <8 x i16> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i1> [[TMP3]], <8 x i1> [[TMP4]], <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = zext <8 x i1> [[TMP5]] to <8 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[TMP6]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = add i16 [[TMP7]], 0 ; CHECK-NEXT: ret i16 [[OP_RDX]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll b/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll @@ -18,7 +18,7 @@ ; CHECK-NEXT: [[A0:%.*]] = load i32, i32* [[A:%.*]], align 8 ; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i64 0 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[A0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDXS0]] to <8 x i32>* ; CHECK-NEXT: store <8 x i32> [[SHUFFLE]], <8 x i32>* [[TMP1]], align 8 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll @@ -10,9 +10,10 @@ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP3]], <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 ; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP2]], <2 x i32> ; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4 ; CHECK-NEXT: ret void ; @@ -43,10 +44,9 @@ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP3]], ptr null, align 4 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> ; CHECK-NEXT: store <2 x float> [[TMP8]], ptr null, align 4 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> -; CHECK-NEXT: store <2 x float> [[TMP9]], ptr null, align 4 ; CHECK-NEXT: ret void ; %1 = getelementptr inbounds float, ptr undef, i32 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll @@ -53,11 +53,9 @@ ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x float> zeroinitializer, <4 x float> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP7]], <2 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP9]], <2 x i32> -; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[TMP8]], 0 -; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[TMP10]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[TMP7]], 0 +; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[TMP8]], 1 ; CHECK-NEXT: ret { <2 x float>, <2 x float> } zeroinitializer ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll @@ -97,9 +97,9 @@ ; AVX-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], [[SHUFFLE1]] ; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i32 1 ; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[C]], i32 2 -; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 3 -; AVX-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP3]], [[TMP6]] -; AVX-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16 +; AVX-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> +; AVX-NEXT: [[TMP6:%.*]] = xor <4 x i32> [[TMP3]], [[SHUFFLE2]] +; AVX-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16 ; AVX-NEXT: ret void ; %add1 = add i32 %c, %a diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll @@ -13,19 +13,20 @@ ; CHECK-NEXT: [[RANGE20_I:%.*]] = getelementptr inbounds [[STRUCT_CLZMADEC_1_28_55_82_103_124_145_166_181_196_229_259_334:%.*]], %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* [[P:%.*]], i64 0, i32 4 ; CHECK-NEXT: br label [[DO_BODY66_I:%.*]] ; CHECK: do.body66.i: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], [[DO_COND_I:%.*]] ], [ undef, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP4:%.*]], [[DO_COND_I:%.*]] ], [ undef, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> , <2 x i32> [[TMP1]], <2 x i32> ; CHECK-NEXT: br i1 undef, label [[DO_COND_I]], label [[IF_ELSE_I:%.*]] ; CHECK: if.else.i: -; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i32> [[TMP1]], undef +; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP1]], undef ; CHECK-NEXT: br label [[DO_COND_I]] ; CHECK: do.cond.i: -; CHECK-NEXT: [[TMP3]] = phi <2 x i32> [ [[TMP2]], [[IF_ELSE_I]] ], [ [[TMP1]], [[DO_BODY66_I]] ] +; CHECK-NEXT: [[TMP4]] = phi <2 x i32> [ [[TMP3]], [[IF_ELSE_I]] ], [ [[TMP2]], [[DO_BODY66_I]] ] ; CHECK-NEXT: br i1 undef, label [[DO_BODY66_I]], label [[DO_END1006_I:%.*]] ; CHECK: do.end1006.i: -; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[RANGE20_I]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[RANGE20_I]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP5]], <2 x i32>* [[TMP6]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll @@ -129,7 +129,7 @@ ; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds double, double* [[INBUF:%.*]], i64 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[INBUF]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> , <2 x double> [[TMP1]], <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll @@ -20,15 +20,16 @@ ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP4]], <2 x i32> +; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP4]], <2 x i32> +; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP6]]) ; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP7]] = fadd <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll @@ -6,17 +6,17 @@ ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> , i32 [[TMP7:%.*]], i32 2 ; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 undef, i32 4 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> , [[SHUFFLE]] -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> , [[SHUFFLE]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP5]], <8 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = add <8 x i32> zeroinitializer, [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP8]]) -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> zeroinitializer) -; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> , <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 undef, i32 6 +; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <8 x i32> , [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <8 x i32> , [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i32> zeroinitializer, [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]]) +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> zeroinitializer) +; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[TMP64:%.*]] = zext i32 [[OP_RDX]] to i64 ; CHECK-NEXT: ret i64 [[TMP64]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll @@ -305,7 +305,7 @@ ; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP13]], <2 x float> [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP18]], <4 x i32> +; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP18]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[RD1]] ; %c0 = extractelement <4 x i32> %c, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -44,7 +44,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <8 x i32> -; CHECK-NEXT: [[RD1:%.*]] = shufflevector <8 x float> undef, <8 x float> [[TMP3]], <8 x i32> +; CHECK-NEXT: [[RD1:%.*]] = shufflevector <8 x float> undef, <8 x float> [[TMP3]], <8 x i32> ; CHECK-NEXT: ret <8 x float> [[RD1]] ; %c0 = extractelement <4 x i32> %c, i32 0 @@ -340,7 +340,7 @@ ; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP13]], <2 x float> [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP18]], <4 x i32> +; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP18]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[RD1]] ; %c0 = extractelement <4 x i32> %c, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll @@ -12,10 +12,9 @@ ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[X]] to <2 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 16 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP1]], i32 1 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> , float [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[SHUFFLE]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> [[TMP6]], undef ; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], undef ; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[TMP8]], undef diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll @@ -9,10 +9,11 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP0]], <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP0]], <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SHUFFLE]], <4 x i32> +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: store <4 x i32> [[SHUFFLE1]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4 ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll @@ -10,7 +10,7 @@ ; CHECK: bb2.loopexit: ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ [[SHUFFLE:%.*]], [[BB9:%.*]] ], [ poison, [[BB2_LOOPEXIT:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ [[TMP10:%.*]], [[BB9:%.*]] ], [ poison, [[BB2_LOOPEXIT:%.*]] ] ; CHECK-NEXT: ret void ; CHECK: bb3: ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP5:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ] @@ -34,20 +34,20 @@ ; CHECK-NEXT: br i1 poison, label [[BB7]], label [[BB6]] ; CHECK: bb9: ; CHECK-NEXT: [[INDVARS_IV528799:%.*]] = phi i64 [ poison, [[BB10]] ], [ poison, [[BB12]] ] -; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[SHUFFLE1:%.*]], [[BB10]] ], [ [[TMP11:%.*]], [[BB12]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[SHUFFLE:%.*]], [[BB10]] ], [ [[TMP12:%.*]], [[BB12]] ] ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[SHUFFLE]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP10]] = shufflevector <4 x i32> poison, <4 x i32> [[TMP9]], <4 x i32> ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb10: -; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ] ; CHECK-NEXT: [[LANDING_PAD68:%.*]] = landingpad { i8*, i32 } ; CHECK-NEXT: cleanup -; CHECK-NEXT: [[SHUFFLE1]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[SHUFFLE]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: br label [[BB9]] ; CHECK: bb11: ; CHECK-NEXT: ret void ; CHECK: bb12: -; CHECK-NEXT: [[TMP11]] = phi <2 x i32> [ [[TMP6]], [[BB7]] ] +; CHECK-NEXT: [[TMP12]] = phi <2 x i32> [ [[TMP6]], [[BB7]] ] ; CHECK-NEXT: [[LANDING_PAD149:%.*]] = landingpad { i8*, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: br label [[BB9]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll @@ -38,10 +38,11 @@ ; AVX-NEXT: [[T2:%.*]] = load i16, ptr [[ARRAYIDX2]], align 2 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[ARRAYIDX3]], align 2 ; AVX-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <8 x i32> -; AVX-NEXT: [[VECINIT2_I_I2:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> +; AVX-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> undef, <8 x i16> [[TMP3]], <8 x i32> +; AVX-NEXT: [[VECINIT2_I_I2:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP3]], <8 x i32> ; AVX-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I_I2]], i16 [[T2]], i64 3 -; AVX-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <8 x i32> -; AVX-NEXT: [[VECINIT7_I_I1:%.*]] = shufflevector <8 x i16> [[VECINIT3_I_I]], <8 x i16> [[TMP4]], <8 x i32> +; AVX-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <8 x i32> +; AVX-NEXT: [[VECINIT7_I_I1:%.*]] = shufflevector <8 x i16> [[VECINIT3_I_I]], <8 x i16> [[TMP5]], <8 x i32> ; AVX-NEXT: [[T7:%.*]] = bitcast <8 x i16> [[VECINIT7_I_I1]] to <2 x i64> ; AVX-NEXT: ret <2 x i64> [[T7]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll @@ -15,13 +15,13 @@ ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[ADD78_1]], i32 2 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[SUB86_1]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[ADD78_2]], i32 4 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[SUB86_1]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[ADD78_1]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD94_1]], i32 2 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SUB102_1]], i32 3 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB102_3]], i32 4 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = add nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] ; CHECK-NEXT: [[TMP11:%.*]] = sub nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP11]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll @@ -17,23 +17,24 @@ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[SUB14]], i32 1 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP0]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP3]], undef -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP3]], <4 x i32> undef -; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i32> [[TMP5]] to <4 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 -; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP7]], i32 2 -; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP15]] +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> , i32 [[SHR15]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], undef +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP4]], <4 x i32> undef +; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i32> [[TMP6]] to <4 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[TMP7]] to <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 +; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP8]], i32 2 +; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 +; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP16]] ; CHECK-NEXT: unreachable ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll @@ -14,7 +14,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] @@ -51,7 +51,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] @@ -88,7 +88,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] @@ -126,7 +126,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] @@ -164,7 +164,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] @@ -201,7 +201,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG2:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll @@ -9,7 +9,7 @@ ; CHECK-NEXT: br label [[T:%.*]] ; CHECK: t: ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* ; CHECK-NEXT: store <8 x i16> [[TMP0]], <8 x i16>* [[TMP1]], align 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll @@ -6,12 +6,12 @@ ; CHECK-NEXT: for.cond.preheader: ; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[FOR_INC_PREHEADER:%.*]] ; CHECK: for.inc.preheader: -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> , i32 [[TMP0:%.*]], i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> , i32 [[TMP0:%.*]], i32 6 ; CHECK-NEXT: br i1 false, label [[FOR_END]], label [[L1_PREHEADER:%.*]] ; CHECK: for.end: ; CHECK-NEXT: [[DOTPR:%.*]] = phi i32 [ 0, [[FOR_INC_PREHEADER]] ], [ 0, [[FOR_COND_PREHEADER:%.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[DOTPR]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[L1_PREHEADER]] ; CHECK: L1.preheader: ; CHECK-NEXT: [[TMP3:%.*]] = phi <8 x i32> [ [[SHUFFLE]], [[FOR_END]] ], [ [[TMP1]], [[FOR_INC_PREHEADER]] ] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll @@ -8,11 +8,11 @@ ; CHECK-NEXT: [[SUB:%.*]] = fsub float 6.553500e+04, undef ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> poison, float [[SUB]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> , float [[SUB]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[CONV]], i32 1 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP14:%.*]], [[BB3:%.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP15:%.*]], [[BB3:%.*]] ] ; CHECK-NEXT: [[TMP3:%.*]] = load double, double* undef, align 8 ; CHECK-NEXT: br i1 undef, label [[BB3]], label [[BB4:%.*]] ; CHECK: bb4: @@ -24,12 +24,13 @@ ; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <4 x double> [[TMP10]], [[TMP4]] -; CHECK-NEXT: [[TMP12:%.*]] = fptrunc <4 x double> [[TMP10]] to <4 x float> -; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[TMP2]], <4 x float> [[TMP12]] +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> , <4 x double> [[TMP10]], <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = fcmp ogt <4 x double> [[TMP11]], [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = fptrunc <4 x double> [[TMP10]] to <4 x float> +; CHECK-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP12]], <4 x float> [[TMP2]], <4 x float> [[TMP13]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP14]] = phi <4 x float> [ [[TMP13]], [[BB4]] ], [ [[TMP2]], [[BB2]] ] +; CHECK-NEXT: [[TMP15]] = phi <4 x float> [ [[TMP14]], [[BB4]] ], [ [[TMP2]], [[BB2]] ] ; CHECK-NEXT: br label [[BB2]] ; entry: