diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -217,15 +217,18 @@ return true; } +/// \returns True if the value is a constant (but not globals/constant +/// expressions). +static bool isConstant(Value *V) { + return isa(V) && !isa(V) && !isa(V); +} + /// \returns True if all of the values in \p VL are constants (but not /// globals/constant expressions). static bool allConstant(ArrayRef VL) { // Constant expressions and globals can't be vectorized like normal integer/FP // constants. - for (Value *i : VL) - if (!isa(i) || isa(i) || isa(i)) - return false; - return true; + return all_of(VL, isConstant); } /// \returns True if all of the values in \p VL are identical. @@ -4616,6 +4619,8 @@ // Iterate in reverse order to consider insert elements with the high cost. for (unsigned I = VL.size(); I > 0; --I) { unsigned Idx = I - 1; + if (isConstant(VL[Idx])) + continue; if (!UniqueElements.insert(VL[Idx]).second) ShuffledElements.insert(Idx); } @@ -4701,96 +4706,64 @@ } Value *BoUpSLP::gather(ArrayRef VL) { - Value *Val0 = - isa(VL[0]) ? cast(VL[0])->getValueOperand() : VL[0]; - FixedVectorType *VecTy = FixedVectorType::get(Val0->getType(), VL.size()); - Value *Vec = PoisonValue::get(VecTy); - unsigned InsIndex = 0; - for (Value *Val : VL) { - Vec = Builder.CreateInsertElement(Vec, Val, Builder.getInt32(InsIndex++)); + // List of instructions/lanes from current block and/or the blocks which are + // part of the current loop. These instructions will be inserted at the end to + // make it possible to optimize loops and hoist invariant instructions out of + // the loops body with better chances for success. + SmallVector, 4> PostponedInsts; + SmallSet PostponedIndices; + for (int I = 0, E = VL.size(); I < E; ++I) { + if (auto *Inst = dyn_cast(VL[I])) + if (Inst->getParent() == Builder.GetInsertBlock() && + PostponedIndices.insert(I).second) + PostponedInsts.emplace_back(Inst, I); + } + if (Loop *L = LI->getLoopFor(Builder.GetInsertBlock())) { + for (int I = 0, E = VL.size(); I < E; ++I) { + if (auto *Inst = dyn_cast(VL[I])) + if (L->contains(Inst) && PostponedIndices.insert(I).second) + PostponedInsts.emplace_back(Inst, I); + } + } + + auto &&CreateInsertElement = [this](Value *Vec, Value *V, unsigned Pos) { + // No need to insert undefs elements - exit. + if (isa(V)) + return Vec; + Vec = Builder.CreateInsertElement(Vec, V, Builder.getInt32(Pos)); auto *InsElt = dyn_cast(Vec); if (!InsElt) - continue; + return Vec; GatherSeq.insert(InsElt); CSEBlocks.insert(InsElt->getParent()); // Add to our 'need-to-extract' list. - if (TreeEntry *Entry = getTreeEntry(Val)) { + if (TreeEntry *Entry = getTreeEntry(V)) { // Find which lane we need to extract. - int FoundLane = - findLaneForValue(Entry->Scalars, Entry->ReuseShuffleIndices, Val); - ExternalUses.push_back(ExternalUser(Val, InsElt, FoundLane)); - } - } - - return Vec; -} - -Value *BoUpSLP::vectorizeTree(ArrayRef VL) { - InstructionsState S = getSameOpcode(VL); - if (S.getOpcode()) { - if (TreeEntry *E = getTreeEntry(S.OpValue)) { - if (E->isSame(VL)) { - Value *V = vectorizeTree(E); - if (VL.size() == E->Scalars.size() && !E->ReuseShuffleIndices.empty()) { - // Reshuffle to get only unique values. - // If some of the scalars are duplicated in the vectorization tree - // entry, we do not vectorize them but instead generate a mask for the - // reuses. But if there are several users of the same entry, they may - // have different vectorization factors. This is especially important - // for PHI nodes. In this case, we need to adapt the resulting - // instruction for the user vectorization factor and have to reshuffle - // it again to take only unique elements of the vector. Without this - // code the function incorrectly returns reduced vector instruction - // with the same elements, not with the unique ones. - // block: - // %phi = phi <2 x > { .., %entry} {%shuffle, %block} - // %2 = shuffle <2 x > %phi, %poison, <4 x > <0, 0, 1, 1> - // ... (use %2) - // %shuffle = shuffle <2 x> %2, poison, <2 x> {0, 2} - // br %block - SmallVector UniqueIdxs; - SmallSet UsedIdxs; - int Pos = 0; - for (int Idx : E->ReuseShuffleIndices) { - if (UsedIdxs.insert(Idx).second) - UniqueIdxs.emplace_back(Pos); - ++Pos; - } - V = Builder.CreateShuffleVector(V, UniqueIdxs, "shrink.shuffle"); - } - return V; + unsigned FoundLane = + std::distance(Entry->Scalars.begin(), find(Entry->Scalars, V)); + assert(FoundLane < Entry->Scalars.size() && "Couldn't find extract lane"); + if (!Entry->ReuseShuffleIndices.empty()) { + FoundLane = std::distance(Entry->ReuseShuffleIndices.begin(), + find(Entry->ReuseShuffleIndices, FoundLane)); } + ExternalUses.emplace_back(V, InsElt, FoundLane); } + return Vec; + }; + Value *Val0 = + isa(VL[0]) ? cast(VL[0])->getValueOperand() : VL[0]; + FixedVectorType *VecTy = FixedVectorType::get(Val0->getType(), VL.size()); + Value *Vec = PoisonValue::get(VecTy); + for (int I = 0, E = VL.size(); I < E; ++I) { + if (PostponedIndices.contains(I)) + continue; + Vec = CreateInsertElement(Vec, VL[I], I); } + // Append instructions, which are/may be part of the loop, in the end to make + // it possible to hoist non-loop-based instructions. + for (const std::pair &Pair : PostponedInsts) + Vec = CreateInsertElement(Vec, Pair.first, Pair.second); - // Check that every instruction appears once in this bundle. - SmallVector ReuseShuffleIndicies; - SmallVector UniqueValues; - if (VL.size() > 2) { - DenseMap UniquePositions; - for (Value *V : VL) { - auto Res = UniquePositions.try_emplace(V, UniqueValues.size()); - ReuseShuffleIndicies.emplace_back(Res.first->second); - if (Res.second || isa(V)) - UniqueValues.emplace_back(V); - } - // Do not shuffle single element or if number of unique values is not power - // of 2. - if (UniqueValues.size() == VL.size() || UniqueValues.size() <= 1 || - !llvm::isPowerOf2_32(UniqueValues.size())) - ReuseShuffleIndicies.clear(); - else - VL = UniqueValues; - } - - Value *Vec = gather(VL); - if (!ReuseShuffleIndicies.empty()) { - Vec = Builder.CreateShuffleVector(Vec, ReuseShuffleIndicies, "shuffle"); - if (auto *I = dyn_cast(Vec)) { - GatherSeq.insert(I); - CSEBlocks.insert(I->getParent()); - } - } return Vec; } @@ -4798,11 +4771,13 @@ /// Merges shuffle masks and emits final shuffle instruction, if required. class ShuffleInstructionBuilder { IRBuilderBase &Builder; + const unsigned VF = 0; bool IsFinalized = false; SmallVector Mask; public: - ShuffleInstructionBuilder(IRBuilderBase &Builder) : Builder(Builder) {} + ShuffleInstructionBuilder(IRBuilderBase &Builder, unsigned VF) + : Builder(Builder), VF(VF) {} /// Adds a mask, inverting it before applying. void addInversedMask(ArrayRef SubMask) { @@ -4829,8 +4804,9 @@ SmallVector NewMask(SubMask.size(), SubMask.size()); int TermValue = std::min(Mask.size(), SubMask.size()); for (int I = 0, E = SubMask.size(); I < E; ++I) { - if (SubMask[I] >= TermValue || Mask[SubMask[I]] >= TermValue) { - NewMask[I] = E; + if (SubMask[I] >= TermValue || SubMask[I] == UndefMaskElem || + Mask[SubMask[I]] >= TermValue) { + NewMask[I] = UndefMaskElem; continue; } NewMask[I] = Mask[SubMask[I]]; @@ -4840,7 +4816,15 @@ Value *finalize(Value *V) { IsFinalized = true; - if (Mask.empty()) + if (VF == cast(V->getType())->getNumElements() && + Mask.empty()) + return V; + SmallVector NormalizedMask(VF, UndefMaskElem); + std::iota(NormalizedMask.begin(), NormalizedMask.end(), 0); + addMask(NormalizedMask); + + if (VF == cast(V->getType())->getNumElements() && + ShuffleVectorInst::isIdentityMask(Mask)) return V; return Builder.CreateShuffleVector(V, Mask, "shuffle"); } @@ -4852,6 +4836,120 @@ }; } // namespace +Value *BoUpSLP::vectorizeTree(ArrayRef VL) { + unsigned VF = VL.size(); + InstructionsState S = getSameOpcode(VL); + if (S.getOpcode()) { + if (TreeEntry *E = getTreeEntry(S.OpValue)) + if (E->isSame(VL)) { + Value *V = vectorizeTree(E); + if (VF != cast(V->getType())->getNumElements()) { + if (!E->ReuseShuffleIndices.empty()) { + // Reshuffle to get only unique values. + // If some of the scalars are duplicated in the vectorization tree + // entry, we do not vectorize them but instead generate a mask for + // the reuses. But if there are several users of the same entry, + // they may have different vectorization factors. This is especially + // important for PHI nodes. In this case, we need to adapt the + // resulting instruction for the user vectorization factor and have + // to reshuffle it again to take only unique elements of the vector. + // Without this code the function incorrectly returns reduced vector + // instruction with the same elements, not with the unique ones. + + // block: + // %phi = phi <2 x > { .., %entry} {%shuffle, %block} + // %2 = shuffle <2 x > %phi, %poison, <4 x > <0, 0, 1, 1> + // ... (use %2) + // %shuffle = shuffle <2 x> %2, poison, <2 x> {0, 2} + // br %block + SmallVector UniqueIdxs; + SmallSet UsedIdxs; + int Pos = 0; + int Sz = VL.size(); + for (int Idx : E->ReuseShuffleIndices) { + if (Idx != Sz && UsedIdxs.insert(Idx).second) + UniqueIdxs.emplace_back(Pos); + ++Pos; + } + assert(VF >= UsedIdxs.size() && "Expected vectorization factor " + "less than original vector size."); + UniqueIdxs.append(VF - UsedIdxs.size(), UndefMaskElem); + V = Builder.CreateShuffleVector(V, UniqueIdxs, "shrink.shuffle"); + } else { + assert(VF < cast(V->getType())->getNumElements() && + "Expected vectorization factor less " + "than original vector size."); + SmallVector UniformMask(VF, 0); + std::iota(UniformMask.begin(), UniformMask.end(), 0); + V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle"); + } + } + return V; + } + } + + // Check that every instruction appears once in this bundle. + SmallVector ReuseShuffleIndicies; + SmallVector UniqueValues; + if (VL.size() > 2) { + DenseMap UniquePositions; + unsigned NumValues = + std::distance(VL.begin(), find_if(reverse(VL), [](Value *V) { + return !isa(V); + }).base()); + VF = std::max(VF, PowerOf2Ceil(NumValues)); + int UniqueVals = 0; + bool HasUndefs = false; + for (Value *V : VL.drop_back(VL.size() - VF)) { + if (isa(V)) { + ReuseShuffleIndicies.emplace_back(UndefMaskElem); + HasUndefs = true; + continue; + } + if (isConstant(V)) { + ReuseShuffleIndicies.emplace_back(UniqueValues.size()); + UniqueValues.emplace_back(V); + continue; + } + auto Res = UniquePositions.try_emplace(V, UniqueValues.size()); + ReuseShuffleIndicies.emplace_back(Res.first->second); + if (Res.second) { + UniqueValues.emplace_back(V); + ++UniqueVals; + } + } + if (HasUndefs && UniqueVals == 1 && UniqueValues.size() == 1) { + // Emit pure splat vector. + // FIXME: why it is not identified as an identity. + unsigned NumUndefs = count(ReuseShuffleIndicies, UndefMaskElem); + if (NumUndefs == ReuseShuffleIndicies.size() - 1) + ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(), + UndefMaskElem); + else + ReuseShuffleIndicies.assign(VF, 0); + } else if (UniqueValues.size() >= VF - 1 || UniqueValues.size() <= 1) { + ReuseShuffleIndicies.clear(); + UniqueValues.clear(); + UniqueValues.append(VL.begin(), std::next(VL.begin(), NumValues)); + } + UniqueValues.append(VF - UniqueValues.size(), + UndefValue::get(VL[0]->getType())); + VL = UniqueValues; + } + + ShuffleInstructionBuilder ShuffleBuilder(Builder, VF); + Value *Vec = gather(VL); + if (!ReuseShuffleIndicies.empty()) { + ShuffleBuilder.addMask(ReuseShuffleIndicies); + Vec = ShuffleBuilder.finalize(Vec); + if (auto *I = dyn_cast(Vec)) { + GatherSeq.insert(I); + CSEBlocks.insert(I->getParent()); + } + } + return Vec; +} + Value *BoUpSLP::vectorizeTree(TreeEntry *E) { IRBuilder<>::InsertPointGuard Guard(Builder); @@ -4860,8 +4958,11 @@ return E->VectorizedValue; } - ShuffleInstructionBuilder ShuffleBuilder(Builder); bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); + unsigned VF = E->Scalars.size(); + if (NeedToShuffleReuses) + VF = E->ReuseShuffleIndices.size(); + ShuffleInstructionBuilder ShuffleBuilder(Builder, VF); if (E->State == TreeEntry::NeedToGather) { setInsertPointAfterBundle(E); Value *Vec; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement-inseltpoison.ll @@ -6,7 +6,7 @@ define <2 x float> @insertelement-fixed-vector() { ; CHECK-LABEL: @insertelement-fixed-vector( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x float> @llvm.fabs.v2f32(<2 x float> undef) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x float> @llvm.fabs.v2f32(<2 x float> poison) ; CHECK-NEXT: ret <2 x float> [[TMP1]] ; %f0 = tail call fast float @llvm.fabs.f32(float undef) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll @@ -6,7 +6,7 @@ define <2 x float> @insertelement-fixed-vector() { ; CHECK-LABEL: @insertelement-fixed-vector( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x float> @llvm.fabs.v2f32(<2 x float> undef) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x float> @llvm.fabs.v2f32(<2 x float> poison) ; CHECK-NEXT: ret <2 x float> [[TMP1]] ; %f0 = tail call fast float @llvm.fabs.f32(float undef) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll @@ -8,21 +8,21 @@ ; CHECK-NEXT: bb: ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[BB:%.*]] ], [ [[TMP11:%.*]], [[BB25:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ poison, [[BB:%.*]] ], [ [[TMP11:%.*]], [[BB25:%.*]] ] ; CHECK-NEXT: br i1 undef, label [[BB3:%.*]], label [[BB11:%.*]] ; CHECK: bb3: ; CHECK-NEXT: [[I4:%.*]] = zext i1 undef to i32 -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i16> [[TMP0]], undef +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i16> [[TMP0]], poison ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i16> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: br label [[BB25]] ; CHECK: bb11: ; CHECK-NEXT: [[I12:%.*]] = zext i1 undef to i32 -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i16> [[TMP0]], undef +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i16> [[TMP0]], poison ; CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i16> [[TMP4]] to <2 x i64> -; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <2 x i64> undef, [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <2 x i64> poison, [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = zext <2 x i1> [[TMP6]] to <2 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = icmp ult <2 x i32> undef, [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult <2 x i32> poison, [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i1> [[TMP8]] to <2 x i32> ; CHECK-NEXT: br label [[BB25]] ; CHECK: bb25: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll @@ -232,9 +232,9 @@ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[V1_LANE_3]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[V1_LANE_0]], i32 2 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[V1_LANE_1]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[V2_LANE_0]], i32 1 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> poison, double [[V2_LANE_2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[V2_LANE_0]], i32 1 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP3]], [[SHUFFLE]] ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> undef, <9 x i32> ; CHECK-NEXT: [[A_INS_31:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP7]], <9 x i32> @@ -358,36 +358,28 @@ ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_0]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[V2_LANE_2]], i32 1 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[V2_LANE_1]], i32 2 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x double> [[TMP10]], double [[V2_LANE_0]], i32 3 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x double> [[TMP11]], double [[V2_LANE_2]], i32 4 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> [[TMP12]], double [[V2_LANE_0]], i32 5 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V2_LANE_2]], i32 6 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V2_LANE_1]], i32 7 -; CHECK-NEXT: [[TMP16:%.*]] = fmul <8 x double> [[TMP7]], [[TMP15]] +; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE2]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x double> [[TMP16]], <8 x double> undef, <9 x i32> -; CHECK-NEXT: [[A_INS_72:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP17]], <9 x i32> -; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[A_INS_72]], double [[A_LANE_8]], i32 8 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_6]], i32 0 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_7]], i32 1 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_8]], i32 2 -; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x double> [[TMP20]], double [[V1_LANE_0]], i32 3 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V1_LANE_1]], i32 4 -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V1_LANE_2]], i32 5 -; CHECK-NEXT: [[TMP24:%.*]] = insertelement <8 x double> [[TMP23]], double [[V1_LANE_3]], i32 6 -; CHECK-NEXT: [[TMP25:%.*]] = insertelement <8 x double> [[TMP24]], double [[V1_LANE_4]], i32 7 -; CHECK-NEXT: [[TMP26:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP27:%.*]] = insertelement <8 x double> [[TMP26]], double [[V2_LANE_1]], i32 1 -; CHECK-NEXT: [[TMP28:%.*]] = insertelement <8 x double> [[TMP27]], double [[V2_LANE_0]], i32 2 -; CHECK-NEXT: [[TMP29:%.*]] = insertelement <8 x double> [[TMP28]], double [[V2_LANE_2]], i32 3 -; CHECK-NEXT: [[TMP30:%.*]] = insertelement <8 x double> [[TMP29]], double [[V2_LANE_1]], i32 4 -; CHECK-NEXT: [[TMP31:%.*]] = insertelement <8 x double> [[TMP30]], double [[V2_LANE_0]], i32 5 -; CHECK-NEXT: [[TMP32:%.*]] = insertelement <8 x double> [[TMP31]], double [[V2_LANE_2]], i32 6 -; CHECK-NEXT: [[TMP33:%.*]] = insertelement <8 x double> [[TMP32]], double [[V2_LANE_1]], i32 7 -; CHECK-NEXT: [[TMP34:%.*]] = fmul <8 x double> [[TMP25]], [[TMP33]] +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> undef, <9 x i32> +; CHECK-NEXT: [[A_INS_73:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP12]], <9 x i32> +; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[A_INS_73]], double [[A_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_6]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V1_LANE_7]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V1_LANE_8]], i32 2 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x double> [[TMP15]], double [[V1_LANE_0]], i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x double> [[TMP16]], double [[V1_LANE_1]], i32 4 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> [[TMP17]], double [[V1_LANE_2]], i32 5 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_3]], i32 6 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_4]], i32 7 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_2]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V2_LANE_1]], i32 1 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V2_LANE_0]], i32 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x double> [[TMP23]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <8 x double> [[TMP34]], <8 x double> undef, <9 x i32> -; CHECK-NEXT: [[B_INS_71:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP35]], <9 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> undef, <9 x i32> +; CHECK-NEXT: [[B_INS_71:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP25]], <9 x i32> ; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[B_INS_71]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], <9 x double>* [[PTR_1]], align 8 @@ -485,28 +477,25 @@ ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_1]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[V2_LANE_0]], i32 1 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[V2_LANE_2]], i32 2 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x double> [[TMP10]], double [[V2_LANE_0]], i32 3 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x double> [[TMP11]], double [[V2_LANE_2]], i32 4 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> [[TMP12]], double [[V2_LANE_1]], i32 5 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V2_LANE_0]], i32 6 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V2_LANE_2]], i32 7 -; CHECK-NEXT: [[TMP16:%.*]] = fmul <8 x double> [[TMP7]], [[TMP15]] +; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE2]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_1]] -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x double> [[TMP16]], <8 x double> undef, <9 x i32> -; CHECK-NEXT: [[A_INS_72:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP17]], <9 x i32> -; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[A_INS_72]], double [[A_LANE_8]], i32 8 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_6]], i32 0 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_7]], i32 1 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_8]], i32 2 -; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x double> [[TMP20]], double [[V1_LANE_0]], i32 3 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V1_LANE_1]], i32 4 -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V1_LANE_2]], i32 5 -; CHECK-NEXT: [[TMP24:%.*]] = insertelement <8 x double> [[TMP23]], double [[V1_LANE_3]], i32 6 -; CHECK-NEXT: [[TMP25:%.*]] = insertelement <8 x double> [[TMP24]], double [[V1_LANE_4]], i32 7 -; CHECK-NEXT: [[TMP26:%.*]] = fmul <8 x double> [[TMP25]], [[TMP15]] +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> undef, <9 x i32> +; CHECK-NEXT: [[A_INS_73:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP12]], <9 x i32> +; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[A_INS_73]], double [[A_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_6]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V1_LANE_7]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V1_LANE_8]], i32 2 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x double> [[TMP15]], double [[V1_LANE_0]], i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x double> [[TMP16]], double [[V1_LANE_1]], i32 4 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> [[TMP17]], double [[V1_LANE_2]], i32 5 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_3]], i32 6 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_4]], i32 7 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <8 x double> [[TMP26]], <8 x double> undef, <9 x i32> -; CHECK-NEXT: [[B_INS_71:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP27]], <9 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x double> [[TMP21]], <8 x double> undef, <9 x i32> +; CHECK-NEXT: [[B_INS_71:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP22]], <9 x i32> ; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[B_INS_71]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], <9 x double>* [[PTR_1]], align 8 @@ -604,36 +593,28 @@ ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_0]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[V2_LANE_2]], i32 1 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[V2_LANE_1]], i32 2 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x double> [[TMP10]], double [[V2_LANE_2]], i32 3 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x double> [[TMP11]], double [[V2_LANE_1]], i32 4 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> [[TMP12]], double [[V2_LANE_0]], i32 5 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V2_LANE_2]], i32 6 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V2_LANE_1]], i32 7 -; CHECK-NEXT: [[TMP16:%.*]] = fmul <8 x double> [[TMP7]], [[TMP15]] +; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE2]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x double> [[TMP16]], <8 x double> undef, <9 x i32> -; CHECK-NEXT: [[A_INS_72:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP17]], <9 x i32> -; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[A_INS_72]], double [[A_LANE_8]], i32 8 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_7]], i32 0 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_6]], i32 1 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_8]], i32 2 -; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x double> [[TMP20]], double [[V1_LANE_1]], i32 3 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V1_LANE_0]], i32 4 -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V1_LANE_3]], i32 5 -; CHECK-NEXT: [[TMP24:%.*]] = insertelement <8 x double> [[TMP23]], double [[V1_LANE_2]], i32 6 -; CHECK-NEXT: [[TMP25:%.*]] = insertelement <8 x double> [[TMP24]], double [[V1_LANE_5]], i32 7 -; CHECK-NEXT: [[TMP26:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP27:%.*]] = insertelement <8 x double> [[TMP26]], double [[V2_LANE_1]], i32 1 -; CHECK-NEXT: [[TMP28:%.*]] = insertelement <8 x double> [[TMP27]], double [[V2_LANE_0]], i32 2 -; CHECK-NEXT: [[TMP29:%.*]] = insertelement <8 x double> [[TMP28]], double [[V2_LANE_2]], i32 3 -; CHECK-NEXT: [[TMP30:%.*]] = insertelement <8 x double> [[TMP29]], double [[V2_LANE_0]], i32 4 -; CHECK-NEXT: [[TMP31:%.*]] = insertelement <8 x double> [[TMP30]], double [[V2_LANE_2]], i32 5 -; CHECK-NEXT: [[TMP32:%.*]] = insertelement <8 x double> [[TMP31]], double [[V2_LANE_1]], i32 6 -; CHECK-NEXT: [[TMP33:%.*]] = insertelement <8 x double> [[TMP32]], double [[V2_LANE_0]], i32 7 -; CHECK-NEXT: [[TMP34:%.*]] = fmul <8 x double> [[TMP25]], [[TMP33]] +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> undef, <9 x i32> +; CHECK-NEXT: [[A_INS_73:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP12]], <9 x i32> +; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[A_INS_73]], double [[A_LANE_8]], i32 8 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_7]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V1_LANE_6]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V1_LANE_8]], i32 2 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x double> [[TMP15]], double [[V1_LANE_1]], i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x double> [[TMP16]], double [[V1_LANE_0]], i32 4 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> [[TMP17]], double [[V1_LANE_3]], i32 5 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_2]], i32 6 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_5]], i32 7 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_2]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V2_LANE_1]], i32 1 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V2_LANE_0]], i32 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x double> [[TMP23]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_4]], [[V2_LANE_2]] -; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <8 x double> [[TMP34]], <8 x double> undef, <9 x i32> -; CHECK-NEXT: [[B_INS_71:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP35]], <9 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> undef, <9 x i32> +; CHECK-NEXT: [[B_INS_71:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP25]], <9 x i32> ; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[B_INS_71]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], <9 x double>* [[PTR_1]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll @@ -19,7 +19,7 @@ ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[ADD277]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP1]], i32 2 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP2]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> undef, [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> poison, [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = ashr <4 x i32> [[TMP7]], ; CHECK-NEXT: [[ARRAYIDX372_3:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 3 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[ARRAYIDX372]] to <4 x i32>* diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll @@ -7,7 +7,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP10:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[SHUFFLE]], @@ -39,22 +39,21 @@ ; CHECK-NEXT: [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]] ; CHECK-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]] ; CHECK-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[OP_EXTRA26]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 14910, i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i32> [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP11]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> , i32 [[OP_EXTRA26]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = and <2 x i32> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP10]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1 ; CHECK-NEXT: br label [[LOOP]] ; ; FORCE_REDUCTION-LABEL: @Test( ; FORCE_REDUCTION-NEXT: entry: ; FORCE_REDUCTION-NEXT: br label [[LOOP:%.*]] ; FORCE_REDUCTION: loop: -; FORCE_REDUCTION-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP13:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] +; FORCE_REDUCTION-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP12:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] ; FORCE_REDUCTION-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> ; FORCE_REDUCTION-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1 ; FORCE_REDUCTION-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], @@ -96,11 +95,10 @@ ; FORCE_REDUCTION-NEXT: [[VAL_41:%.*]] = add i32 [[TMP2]], 13685 ; FORCE_REDUCTION-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[VAL_40]], i32 0 ; FORCE_REDUCTION-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP2]], i32 1 -; FORCE_REDUCTION-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[VAL_41]], i32 0 -; FORCE_REDUCTION-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 14910, i32 1 -; FORCE_REDUCTION-NEXT: [[TMP11:%.*]] = and <2 x i32> [[TMP8]], [[TMP10]] -; FORCE_REDUCTION-NEXT: [[TMP12:%.*]] = add <2 x i32> [[TMP8]], [[TMP10]] -; FORCE_REDUCTION-NEXT: [[TMP13]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> +; FORCE_REDUCTION-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> , i32 [[VAL_41]], i32 0 +; FORCE_REDUCTION-NEXT: [[TMP10:%.*]] = and <2 x i32> [[TMP8]], [[TMP9]] +; FORCE_REDUCTION-NEXT: [[TMP11:%.*]] = add <2 x i32> [[TMP8]], [[TMP9]] +; FORCE_REDUCTION-NEXT: [[TMP12]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> ; FORCE_REDUCTION-NEXT: br label [[LOOP]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll @@ -67,9 +67,9 @@ ; AVX-NEXT: [[TMP14:%.*]] = insertelement <16 x i8> [[TMP13]], i8 [[C]], i32 13 ; AVX-NEXT: [[TMP15:%.*]] = insertelement <16 x i8> [[TMP14]], i8 [[C]], i32 14 ; AVX-NEXT: [[TMP16:%.*]] = insertelement <16 x i8> [[TMP15]], i8 [[C]], i32 15 -; AVX-NEXT: [[TMP17:%.*]] = insertelement <2 x i8> poison, i8 [[A:%.*]], i32 0 -; AVX-NEXT: [[TMP18:%.*]] = insertelement <2 x i8> [[TMP17]], i8 [[B:%.*]], i32 1 -; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i8> [[TMP18]], <2 x i8> poison, <16 x i32> +; AVX-NEXT: [[TMP17:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0 +; AVX-NEXT: [[TMP18:%.*]] = insertelement <16 x i8> [[TMP17]], i8 [[B:%.*]], i32 1 +; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[TMP18]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: [[TMP19:%.*]] = xor <16 x i8> [[TMP16]], [[SHUFFLE]] ; AVX-NEXT: store <16 x i8> [[TMP19]], <16 x i8>* bitcast ([32 x i8]* @cle to <16 x i8>*), align 16 ; AVX-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll @@ -35,9 +35,8 @@ ; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double undef, i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x double> , double [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = fmul fast <2 x double> [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[TMP13]], [[TMP14]] ; CHECK-NEXT: switch i32 undef, label [[BB1:%.*]] [ ; CHECK-NEXT: i32 0, label [[BB2:%.*]] ; CHECK-NEXT: ] @@ -46,7 +45,7 @@ ; CHECK: bb2: ; CHECK-NEXT: br label [[LABEL]] ; CHECK: label: -; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x double> [ [[TMP12]], [[BB1]] ], [ [[TMP16]], [[BB2]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x double> [ [[TMP12]], [[BB1]] ], [ [[TMP15]], [[BB2]] ] ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll @@ -131,10 +131,9 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double undef, i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll @@ -97,10 +97,10 @@ ; CHECK-NEXT: [[TMP2:%.*]] = load double, double* undef, align 8 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[TMP]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], undef +; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], poison ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_HOGE:%.*]], %struct.hoge* [[ARG:%.*]], i64 0, i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], undef -; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], poison +; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], poison ; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[TMP7]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 ; CHECK-NEXT: br i1 undef, label [[BB11:%.*]], label [[BB12:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll @@ -31,15 +31,14 @@ ; CHECK: cond.false66.us: ; CHECK-NEXT: [[ADD_I276_US:%.*]] = fadd double 0.000000e+00, undef ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[ADD_I276_US]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double undef, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> undef, [[TMP2]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[AGG_TMP99208_SROA_0_0_IDX]] to <2 x double>* +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[TMP0]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> poison, [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[AGG_TMP99208_SROA_0_0_IDX]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP5]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[AGG_TMP101211_SROA_0_0_IDX]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[AGG_TMP101211_SROA_0_0_IDX]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP7]], align 8 ; CHECK-NEXT: unreachable ; CHECK: cond.true63.us: ; CHECK-NEXT: unreachable @@ -111,14 +110,14 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 undef, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]] ; CHECK: if.then38: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> , double undef, i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> undef, [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> undef, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> undef, [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> undef, [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> undef, [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> undef, [[TMP6]] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double poison, i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> poison, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> poison, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> poison, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> poison, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> poison, [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> poison, [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> poison, [[TMP6]] ; CHECK-NEXT: [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY_5_11_53_95_137_191_197_203_239_257_263_269_275_281_287_293_383_437_443_455_461_599_601:%.*]], %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601* undef, i64 0, i32 1, i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[AGG_TMP74663_SROA_0_0_IDX]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll @@ -12,7 +12,7 @@ ; CHECK-NEXT: br i1 [[TOBOOL_NOT19]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[C_022:%.*]] = phi i32* [ [[C_022_BE:%.*]], [[WHILE_BODY_BACKEDGE:%.*]] ], [ undef, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32*> [ [[TMP14:%.*]], [[WHILE_BODY_BACKEDGE]] ], [ undef, [[ENTRY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32*> [ [[TMP14:%.*]], [[WHILE_BODY_BACKEDGE]] ], [ poison, [[ENTRY]] ] ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[C_022]], i64 1 ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint i32* [[C_022]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll b/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll @@ -16,9 +16,9 @@ define i32 @foo(i32* nocapture %A, i32 %n, i32 %k) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[N:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[K:%.*]], i32 1 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[K:%.*]], i32 1 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_024:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD10:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -1026,12 +1026,12 @@ ; THRESH-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]] ; THRESH-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]] ; THRESH-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]] -; THRESH-NEXT: [[TMP13:%.*]] = insertelement <2 x i1> poison, i1 [[TMP12]], i32 0 -; THRESH-NEXT: [[TMP14:%.*]] = insertelement <2 x i1> [[TMP13]], i1 [[TMP5]], i32 1 -; THRESH-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 -; THRESH-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP3]], i32 1 -; THRESH-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0 -; THRESH-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> [[TMP17]], i32 [[TMP4]], i32 1 +; THRESH-NEXT: [[TMP13:%.*]] = insertelement <2 x i1> poison, i1 [[TMP5]], i32 1 +; THRESH-NEXT: [[TMP14:%.*]] = insertelement <2 x i1> [[TMP13]], i1 [[TMP12]], i32 0 +; THRESH-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i32 1 +; THRESH-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP11]], i32 0 +; THRESH-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i32 1 +; THRESH-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> [[TMP17]], i32 [[TMP8]], i32 0 ; THRESH-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP14]], <2 x i32> [[TMP16]], <2 x i32> [[TMP18]] ; THRESH-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1 ; THRESH-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0 @@ -1334,8 +1334,8 @@ define void @PR49730() { ; CHECK-LABEL: @PR49730( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> ) -; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> undef, [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> poison, <4 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> poison, [[TMP1]] ; CHECK-NEXT: [[T12:%.*]] = sub nsw i32 undef, undef ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[T12]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll @@ -11,12 +11,9 @@ ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 8, i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP6]], <4 x i32> -; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> , i32 [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> +; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4 ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll @@ -52,7 +52,7 @@ ; CHECK-NEXT: ret void ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ undef, [[ENTRY]] ], [ [[TMP26]], [[FOR_INC]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ poison, [[ENTRY]] ], [ [[TMP26]], [[FOR_INC]] ] ; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll @@ -11,45 +11,29 @@ ; CHECK-NEXT: [[ADD78_2:%.*]] = add nsw i32 undef, undef ; CHECK-NEXT: [[SUB102_3:%.*]] = sub nsw i32 undef, undef ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[SUB102_3]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 undef, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[SUB102_1]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 undef, i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 undef, i32 4 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 undef, i32 5 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 undef, i32 6 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD94_1]], i32 7 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[ADD78_1]], i32 8 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB86_1]], i32 9 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 undef, i32 10 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[ADD78_2]], i32 11 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x i32> [[TMP11]], i32 undef, i32 12 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 undef, i32 13 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <16 x i32> [[TMP13]], i32 undef, i32 14 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x i32> [[TMP14]], i32 undef, i32 15 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x i32> , i32 [[SUB86_1]], i32 2 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <16 x i32> [[TMP16]], i32 undef, i32 3 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <16 x i32> [[TMP17]], i32 undef, i32 4 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x i32> [[TMP18]], i32 undef, i32 5 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <16 x i32> [[TMP19]], i32 undef, i32 6 -; CHECK-NEXT: [[TMP21:%.*]] = insertelement <16 x i32> [[TMP20]], i32 [[ADD78_1]], i32 7 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <16 x i32> [[TMP21]], i32 [[ADD94_1]], i32 8 -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <16 x i32> [[TMP22]], i32 [[SUB102_1]], i32 9 -; CHECK-NEXT: [[TMP24:%.*]] = insertelement <16 x i32> [[TMP23]], i32 [[ADD78_2]], i32 10 -; CHECK-NEXT: [[TMP25:%.*]] = insertelement <16 x i32> [[TMP24]], i32 undef, i32 11 -; CHECK-NEXT: [[TMP26:%.*]] = insertelement <16 x i32> [[TMP25]], i32 undef, i32 12 -; CHECK-NEXT: [[TMP27:%.*]] = insertelement <16 x i32> [[TMP26]], i32 undef, i32 13 -; CHECK-NEXT: [[TMP28:%.*]] = insertelement <16 x i32> [[TMP27]], i32 undef, i32 14 -; CHECK-NEXT: [[TMP29:%.*]] = insertelement <16 x i32> [[TMP28]], i32 [[SUB102_3]], i32 15 -; CHECK-NEXT: [[TMP30:%.*]] = add nsw <16 x i32> [[TMP15]], [[TMP29]] -; CHECK-NEXT: [[TMP31:%.*]] = sub nsw <16 x i32> [[TMP15]], [[TMP29]] -; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <16 x i32> [[TMP30]], <16 x i32> [[TMP31]], <16 x i32> -; CHECK-NEXT: [[TMP33:%.*]] = lshr <16 x i32> [[TMP32]], -; CHECK-NEXT: [[TMP34:%.*]] = and <16 x i32> [[TMP33]], -; CHECK-NEXT: [[TMP35:%.*]] = mul nuw <16 x i32> [[TMP34]], -; CHECK-NEXT: [[TMP36:%.*]] = add <16 x i32> [[TMP35]], [[TMP32]] -; CHECK-NEXT: [[TMP37:%.*]] = xor <16 x i32> [[TMP36]], [[TMP35]] -; CHECK-NEXT: [[TMP38:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP37]]) -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP38]], 16 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[SUB102_1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[ADD94_1]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[ADD78_1]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[SUB86_1]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 [[ADD78_2]], i32 5 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> poison, i32 [[SUB86_1]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD78_1]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[ADD94_1]], i32 2 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB102_1]], i32 3 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[ADD78_2]], i32 4 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[SUB102_3]], i32 5 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP13:%.*]] = sub nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i32> [[TMP12]], <16 x i32> [[TMP13]], <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = lshr <16 x i32> [[TMP14]], +; CHECK-NEXT: [[TMP16:%.*]] = and <16 x i32> [[TMP15]], +; CHECK-NEXT: [[TMP17:%.*]] = mul nuw <16 x i32> [[TMP16]], +; CHECK-NEXT: [[TMP18:%.*]] = add <16 x i32> [[TMP17]], [[TMP14]] +; CHECK-NEXT: [[TMP19:%.*]] = xor <16 x i32> [[TMP18]], [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP19]]) +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP20]], 16 ; CHECK-NEXT: [[ADD119:%.*]] = add nuw nsw i32 undef, [[SHR]] ; CHECK-NEXT: [[SHR120:%.*]] = lshr i32 [[ADD119]], 1 ; CHECK-NEXT: ret i32 [[SHR120]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll @@ -13,31 +13,27 @@ ; CHECK: if.end: ; CHECK-NEXT: [[SUB14:%.*]] = sub nsw i32 [[Y_POS:%.*]], undef ; CHECK-NEXT: [[SHR15:%.*]] = ashr i32 [[SUB14]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[SHR15]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[SUB14]], i32 1 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[SHR15]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[SUB14]], i32 1 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[SHR15]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 undef, i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 undef, i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 undef, i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP6]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[TMP7]], undef -; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP7]], <4 x i32> undef -; CHECK-NEXT: [[TMP10:%.*]] = sext <4 x i32> [[TMP9]] to <4 x i64> -; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i64> [[TMP10]] to <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP0]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP3]], poison +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP3]], <4 x i32> poison +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i32> [[TMP5]] to <4 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 +; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP7]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP7]], i32 2 ; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP11]], i32 1 +; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP7]], i32 3 ; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP15]] -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP11]], i32 2 -; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i32> [[TMP11]], i32 3 -; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP18]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP19]] +; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP15]] ; CHECK-NEXT: unreachable ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll @@ -14,12 +14,12 @@ ; CHECK-LABEL: @Rf_GReset( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load double, double* @d, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> , double [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> , [[TMP1]] ; CHECK-NEXT: br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label [[IF_THEN:%.*]], label [[IF_END7:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef -; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], undef +; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], poison +; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], poison ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 ; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP5]], [[TMP6]] @@ -55,12 +55,12 @@ ; CHECK-LABEL: @Rf_GReset_unary_fneg( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load double, double* @d, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> , double [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = fneg <2 x double> [[TMP1]] ; CHECK-NEXT: br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label [[IF_THEN:%.*]], label [[IF_END7:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef -; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], undef +; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], poison +; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], poison ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 ; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP5]], [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll @@ -12,12 +12,12 @@ ; CHECK-NEXT: invoke void @foo() ; CHECK-NEXT: to label [[DONE:%.*]] unwind label [[LPAD]] ; CHECK: lpad: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x double> [ undef, [[ENTRY:%.*]] ], [ undef, [[INNER]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x double> [ poison, [[ENTRY:%.*]] ], [ poison, [[INNER]] ] ; CHECK-NEXT: [[TMP1:%.*]] = landingpad { i8*, i32 } ; CHECK-NEXT: catch i8* null ; CHECK-NEXT: br label [[DONE]] ; CHECK: done: -; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ undef, [[INNER]] ], [ [[TMP0]], [[LPAD]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ poison, [[INNER]] ], [ [[TMP0]], [[LPAD]] ] ; CHECK-NEXT: ret void ; bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll @@ -103,11 +103,11 @@ ; AVX-NEXT: [[ADD:%.*]] = add i64 undef, undef ; AVX-NEXT: store i64 [[ADD]], i64* undef, align 1 ; AVX-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 5 -; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> , i64 [[TMP0]], i32 1 +; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i32 1 ; AVX-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], ; AVX-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], ; AVX-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 4 -; AVX-NEXT: [[TMP4:%.*]] = add nuw nsw <2 x i64> [[TMP3]], zeroinitializer +; AVX-NEXT: [[TMP4:%.*]] = add nuw nsw <2 x i64> [[TMP3]], poison ; AVX-NEXT: [[ARRAYIDX2_5:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 1 ; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 ; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll @@ -88,8 +88,8 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[B:%.*]] ; CHECK-NEXT: [[MUL:%.*]] = fmul double [[A:%.*]], 2.000000e+00 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[FNEG]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[C:%.*]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[FNEG]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP1]], [[TMP3]] @@ -137,8 +137,8 @@ ; CHECK-LABEL: @fcmp_lt( ; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[B:%.*]] ; CHECK-NEXT: [[MUL:%.*]] = fmul double [[A:%.*]], 2.000000e+00 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[FNEG]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[C:%.*]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[FNEG]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[B]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP2]], [[TMP4]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll @@ -32,12 +32,10 @@ ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[V5]], i32 5 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i16> [[TMP5]], i16 [[V6]], i32 6 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[V7]], i32 7 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i16> poison, i16 [[V0]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i16> [[TMP8]], i16 undef, i32 1 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[TMP9]], <2 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i16> [[TMP7]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16* [[P0]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP10]], <8 x i16>* [[TMP11]], align 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i16> [[TMP7]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP8]], <8 x i16>* [[TMP9]], align 2 ; CHECK-NEXT: ret void ; ; YAML: Pass: slp-vectorizer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll @@ -12,22 +12,21 @@ ; CHECK: bb2: ; CHECK-NEXT: [[T:%.*]] = select i1 undef, i16 undef, i16 15 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> poison, i16 [[T]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> [[TMP0]], i16 undef, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <2 x i32> , [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], undef -; CHECK-NEXT: [[SHUFFLE10:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[SHUFFLE10]], -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]]) -; CHECK-NEXT: [[T19:%.*]] = select i1 undef, i32 [[TMP6]], i32 undef +; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i16> [[TMP0]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <2 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], poison +; CHECK-NEXT: [[SHUFFLE10:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[SHUFFLE10]], +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[T19:%.*]] = select i1 undef, i32 [[TMP5]], i32 undef ; CHECK-NEXT: [[T20:%.*]] = icmp sgt i32 [[T19]], 63 -; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP2]] -; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP9]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = icmp slt i32 [[TMP10]], undef -; CHECK-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP10]], i32 undef +; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <2 x i32> poison, [[TMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[TMP6]], poison +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP8]]) +; CHECK-NEXT: [[OP_EXTRA:%.*]] = icmp slt i32 [[TMP9]], undef +; CHECK-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP9]], i32 undef ; CHECK-NEXT: [[OP_EXTRA2:%.*]] = icmp slt i32 [[OP_EXTRA1]], undef ; CHECK-NEXT: [[OP_EXTRA3:%.*]] = select i1 [[OP_EXTRA2]], i32 [[OP_EXTRA1]], i32 undef ; CHECK-NEXT: [[OP_EXTRA4:%.*]] = icmp slt i32 [[OP_EXTRA3]], undef diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll @@ -15,9 +15,9 @@ ; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 4 ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 5 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], undef -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> undef, <4 x i32> [[SHUFFLE1]] -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], poison +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> poison, <4 x i32> [[SHUFFLE1]] +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> poison, <4 x i32> zeroinitializer, <4 x i32> [[TMP4]] ; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 6 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP27]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 8 @@ -66,7 +66,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[ENTRY:%.*]] ], [ [[SHRINK_SHUFFLE:%.*]], [[IF_END:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ poison, [[ENTRY:%.*]] ], [ [[SHRINK_SHUFFLE:%.*]], [[IF_END:%.*]] ] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[TMP0]], <2 x i16> poison, <8 x i32> ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll @@ -279,9 +279,9 @@ ; CHECK-NEXT: [[PTR5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 5 ; CHECK-NEXT: [[PTR6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 6 ; CHECK-NEXT: [[PTR7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 7 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP2]], i32 1 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP2]], i32 1 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[PTR0]] to <8 x i32>* ; CHECK-NEXT: store <8 x i32> [[SHUFFLE]], <8 x i32>* [[TMP5]], align 16 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll @@ -13,27 +13,27 @@ ; CHECK-NEXT: bb279: ; CHECK-NEXT: br label [[BB283:%.*]] ; CHECK: bb283: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ undef, [[EXIT]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ poison, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ poison, [[BB279]] ], [ poison, [[EXIT]] ] ; CHECK-NEXT: br label [[BB284:%.*]] ; CHECK: bb284: ; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double> -; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef -; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef +; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], poison +; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], poison ; CHECK-NEXT: br label [[BB21_I:%.*]] ; CHECK: bb21.i: ; CHECK-NEXT: br i1 undef, label [[BB22_I:%.*]], label [[EXIT]] ; CHECK: bb22.i: -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> poison, [[TMP4]] ; CHECK-NEXT: br label [[BB32_I:%.*]] ; CHECK: bb32.i: ; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x double> [ [[TMP5]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ] ; CHECK-NEXT: br i1 undef, label [[BB32_I]], label [[BB21_I]] ; CHECK: exit: ; CHECK-NEXT: [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double> -; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], undef +; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> poison, [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], poison ; CHECK-NEXT: [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float> ; CHECK-NEXT: br label [[BB283]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll @@ -13,27 +13,27 @@ ; CHECK-NEXT: bb279: ; CHECK-NEXT: br label [[BB283:%.*]] ; CHECK: bb283: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ undef, [[EXIT]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ poison, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ poison, [[BB279]] ], [ poison, [[EXIT]] ] ; CHECK-NEXT: br label [[BB284:%.*]] ; CHECK: bb284: ; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double> -; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef -; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef +; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], poison +; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], poison ; CHECK-NEXT: br label [[BB21_I:%.*]] ; CHECK: bb21.i: ; CHECK-NEXT: br i1 undef, label [[BB22_I:%.*]], label [[EXIT]] ; CHECK: bb22.i: -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> poison, [[TMP4]] ; CHECK-NEXT: br label [[BB32_I:%.*]] ; CHECK: bb32.i: ; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x double> [ [[TMP5]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ] ; CHECK-NEXT: br i1 undef, label [[BB32_I]], label [[BB21_I]] ; CHECK: exit: ; CHECK-NEXT: [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double> -; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], undef +; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> poison, [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], poison ; CHECK-NEXT: [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float> ; CHECK-NEXT: br label [[BB283]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll b/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll --- a/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll +++ b/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll @@ -166,48 +166,43 @@ ; MAX256-NEXT: [[TMP30:%.*]] = fadd <8 x float> zeroinitializer, [[TMP29]] ; MAX256-NEXT: [[TMP31:%.*]] = fmul <8 x float> [[TMP26]], [[TMP12]] ; MAX256-NEXT: [[TMP32:%.*]] = fadd <8 x float> zeroinitializer, [[TMP31]] -; MAX256-NEXT: [[TMP33:%.*]] = extractelement <8 x float> [[TMP14]], i32 0 -; MAX256-NEXT: [[TMP34:%.*]] = insertelement <8 x float> poison, float [[TMP33]], i32 0 -; MAX256-NEXT: [[TMP35:%.*]] = extractelement <8 x float> [[TMP14]], i32 1 -; MAX256-NEXT: [[TMP36:%.*]] = insertelement <8 x float> [[TMP34]], float [[TMP35]], i32 1 -; MAX256-NEXT: [[TMP37:%.*]] = insertelement <8 x float> [[TMP36]], float [[FVAL]], i32 2 -; MAX256-NEXT: [[TMP38:%.*]] = insertelement <8 x float> [[TMP37]], float [[FVAL]], i32 3 -; MAX256-NEXT: [[TMP39:%.*]] = extractelement <8 x float> [[TMP14]], i32 4 -; MAX256-NEXT: [[TMP40:%.*]] = insertelement <8 x float> [[TMP38]], float [[TMP39]], i32 4 -; MAX256-NEXT: [[TMP41:%.*]] = extractelement <8 x float> [[TMP14]], i32 5 -; MAX256-NEXT: [[TMP42:%.*]] = insertelement <8 x float> [[TMP40]], float [[TMP41]], i32 5 -; MAX256-NEXT: [[TMP43:%.*]] = insertelement <8 x float> [[TMP42]], float [[FVAL]], i32 6 -; MAX256-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[FVAL]], i32 7 -; MAX256-NEXT: [[TMP45:%.*]] = extractelement <8 x float> [[TMP28]], i32 2 -; MAX256-NEXT: [[TMP46:%.*]] = insertelement <8 x float> [[TMP6]], float [[TMP45]], i32 2 -; MAX256-NEXT: [[TMP47:%.*]] = extractelement <8 x float> [[TMP28]], i32 3 -; MAX256-NEXT: [[TMP48:%.*]] = insertelement <8 x float> [[TMP46]], float [[TMP47]], i32 3 -; MAX256-NEXT: [[TMP49:%.*]] = insertelement <8 x float> [[TMP48]], float [[FVAL]], i32 4 -; MAX256-NEXT: [[TMP50:%.*]] = insertelement <8 x float> [[TMP49]], float [[FVAL]], i32 5 -; MAX256-NEXT: [[TMP51:%.*]] = extractelement <8 x float> [[TMP28]], i32 6 -; MAX256-NEXT: [[TMP52:%.*]] = insertelement <8 x float> [[TMP50]], float [[TMP51]], i32 6 -; MAX256-NEXT: [[TMP53:%.*]] = extractelement <8 x float> [[TMP28]], i32 7 -; MAX256-NEXT: [[TMP54:%.*]] = insertelement <8 x float> [[TMP52]], float [[TMP53]], i32 7 -; MAX256-NEXT: [[TMP55:%.*]] = extractelement <8 x float> [[TMP30]], i32 2 -; MAX256-NEXT: [[TMP56:%.*]] = insertelement <8 x float> [[TMP6]], float [[TMP55]], i32 2 -; MAX256-NEXT: [[TMP57:%.*]] = extractelement <8 x float> [[TMP30]], i32 3 -; MAX256-NEXT: [[TMP58:%.*]] = insertelement <8 x float> [[TMP56]], float [[TMP57]], i32 3 -; MAX256-NEXT: [[TMP59:%.*]] = insertelement <8 x float> [[TMP58]], float [[FVAL]], i32 4 -; MAX256-NEXT: [[TMP60:%.*]] = insertelement <8 x float> [[TMP59]], float [[FVAL]], i32 5 -; MAX256-NEXT: [[TMP61:%.*]] = extractelement <8 x float> [[TMP30]], i32 6 -; MAX256-NEXT: [[TMP62:%.*]] = insertelement <8 x float> [[TMP60]], float [[TMP61]], i32 6 -; MAX256-NEXT: [[TMP63:%.*]] = extractelement <8 x float> [[TMP30]], i32 7 -; MAX256-NEXT: [[TMP64:%.*]] = insertelement <8 x float> [[TMP62]], float [[TMP63]], i32 7 -; MAX256-NEXT: [[TMP65:%.*]] = extractelement <8 x float> [[TMP32]], i32 2 -; MAX256-NEXT: [[TMP66:%.*]] = insertelement <8 x float> [[TMP6]], float [[TMP65]], i32 2 -; MAX256-NEXT: [[TMP67:%.*]] = extractelement <8 x float> [[TMP32]], i32 3 -; MAX256-NEXT: [[TMP68:%.*]] = insertelement <8 x float> [[TMP66]], float [[TMP67]], i32 3 -; MAX256-NEXT: [[TMP69:%.*]] = insertelement <8 x float> [[TMP68]], float [[FVAL]], i32 4 -; MAX256-NEXT: [[TMP70:%.*]] = insertelement <8 x float> [[TMP69]], float [[FVAL]], i32 5 -; MAX256-NEXT: [[TMP71:%.*]] = extractelement <8 x float> [[TMP32]], i32 6 -; MAX256-NEXT: [[TMP72:%.*]] = insertelement <8 x float> [[TMP70]], float [[TMP71]], i32 6 -; MAX256-NEXT: [[TMP73:%.*]] = extractelement <8 x float> [[TMP32]], i32 7 -; MAX256-NEXT: [[TMP74:%.*]] = insertelement <8 x float> [[TMP72]], float [[TMP73]], i32 7 +; MAX256-NEXT: [[TMP33:%.*]] = insertelement <8 x float> poison, float [[FVAL]], i32 2 +; MAX256-NEXT: [[TMP34:%.*]] = extractelement <8 x float> [[TMP14]], i32 0 +; MAX256-NEXT: [[TMP35:%.*]] = insertelement <8 x float> [[TMP33]], float [[TMP34]], i32 0 +; MAX256-NEXT: [[TMP36:%.*]] = extractelement <8 x float> [[TMP14]], i32 1 +; MAX256-NEXT: [[TMP37:%.*]] = insertelement <8 x float> [[TMP35]], float [[TMP36]], i32 1 +; MAX256-NEXT: [[TMP38:%.*]] = extractelement <8 x float> [[TMP14]], i32 4 +; MAX256-NEXT: [[TMP39:%.*]] = insertelement <8 x float> [[TMP37]], float [[TMP38]], i32 3 +; MAX256-NEXT: [[TMP40:%.*]] = extractelement <8 x float> [[TMP14]], i32 5 +; MAX256-NEXT: [[TMP41:%.*]] = insertelement <8 x float> [[TMP39]], float [[TMP40]], i32 4 +; MAX256-NEXT: [[SHUFFLE3:%.*]] = shufflevector <8 x float> [[TMP41]], <8 x float> poison, <8 x i32> +; MAX256-NEXT: [[TMP42:%.*]] = extractelement <8 x float> [[TMP28]], i32 2 +; MAX256-NEXT: [[TMP43:%.*]] = insertelement <8 x float> [[TMP5]], float [[TMP42]], i32 1 +; MAX256-NEXT: [[TMP44:%.*]] = extractelement <8 x float> [[TMP28]], i32 3 +; MAX256-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP44]], i32 2 +; MAX256-NEXT: [[TMP46:%.*]] = extractelement <8 x float> [[TMP28]], i32 6 +; MAX256-NEXT: [[TMP47:%.*]] = insertelement <8 x float> [[TMP45]], float [[TMP46]], i32 3 +; MAX256-NEXT: [[TMP48:%.*]] = extractelement <8 x float> [[TMP28]], i32 7 +; MAX256-NEXT: [[TMP49:%.*]] = insertelement <8 x float> [[TMP47]], float [[TMP48]], i32 4 +; MAX256-NEXT: [[SHUFFLE6:%.*]] = shufflevector <8 x float> [[TMP49]], <8 x float> poison, <8 x i32> +; MAX256-NEXT: [[TMP50:%.*]] = extractelement <8 x float> [[TMP30]], i32 2 +; MAX256-NEXT: [[TMP51:%.*]] = insertelement <8 x float> [[TMP5]], float [[TMP50]], i32 1 +; MAX256-NEXT: [[TMP52:%.*]] = extractelement <8 x float> [[TMP30]], i32 3 +; MAX256-NEXT: [[TMP53:%.*]] = insertelement <8 x float> [[TMP51]], float [[TMP52]], i32 2 +; MAX256-NEXT: [[TMP54:%.*]] = extractelement <8 x float> [[TMP30]], i32 6 +; MAX256-NEXT: [[TMP55:%.*]] = insertelement <8 x float> [[TMP53]], float [[TMP54]], i32 3 +; MAX256-NEXT: [[TMP56:%.*]] = extractelement <8 x float> [[TMP30]], i32 7 +; MAX256-NEXT: [[TMP57:%.*]] = insertelement <8 x float> [[TMP55]], float [[TMP56]], i32 4 +; MAX256-NEXT: [[SHUFFLE9:%.*]] = shufflevector <8 x float> [[TMP57]], <8 x float> poison, <8 x i32> +; MAX256-NEXT: [[TMP58:%.*]] = extractelement <8 x float> [[TMP32]], i32 2 +; MAX256-NEXT: [[TMP59:%.*]] = insertelement <8 x float> [[TMP5]], float [[TMP58]], i32 1 +; MAX256-NEXT: [[TMP60:%.*]] = extractelement <8 x float> [[TMP32]], i32 3 +; MAX256-NEXT: [[TMP61:%.*]] = insertelement <8 x float> [[TMP59]], float [[TMP60]], i32 2 +; MAX256-NEXT: [[TMP62:%.*]] = extractelement <8 x float> [[TMP32]], i32 6 +; MAX256-NEXT: [[TMP63:%.*]] = insertelement <8 x float> [[TMP61]], float [[TMP62]], i32 3 +; MAX256-NEXT: [[TMP64:%.*]] = extractelement <8 x float> [[TMP32]], i32 7 +; MAX256-NEXT: [[TMP65:%.*]] = insertelement <8 x float> [[TMP63]], float [[TMP64]], i32 4 +; MAX256-NEXT: [[SHUFFLE12:%.*]] = shufflevector <8 x float> [[TMP65]], <8 x float> poison, <8 x i32> ; MAX256-NEXT: switch i32 undef, label [[BB5:%.*]] [ ; MAX256-NEXT: i32 0, label [[BB2:%.*]] ; MAX256-NEXT: i32 1, label [[BB3:%.*]] @@ -216,94 +211,79 @@ ; MAX256: bb3: ; MAX256-NEXT: br label [[BB2]] ; MAX256: bb4: -; MAX256-NEXT: [[TMP75:%.*]] = insertelement <8 x float> [[TMP34]], float [[FVAL]], i32 1 -; MAX256-NEXT: [[TMP76:%.*]] = insertelement <8 x float> [[TMP75]], float [[FVAL]], i32 2 -; MAX256-NEXT: [[TMP77:%.*]] = extractelement <8 x float> [[TMP14]], i32 3 +; MAX256-NEXT: [[TMP66:%.*]] = insertelement <8 x float> poison, float [[TMP34]], i32 0 +; MAX256-NEXT: [[TMP67:%.*]] = insertelement <8 x float> [[TMP66]], float [[FVAL]], i32 1 +; MAX256-NEXT: [[TMP68:%.*]] = extractelement <8 x float> [[TMP14]], i32 3 +; MAX256-NEXT: [[TMP69:%.*]] = insertelement <8 x float> [[TMP67]], float [[TMP68]], i32 2 +; MAX256-NEXT: [[TMP70:%.*]] = insertelement <8 x float> [[TMP69]], float [[TMP38]], i32 3 +; MAX256-NEXT: [[TMP71:%.*]] = extractelement <8 x float> [[TMP14]], i32 7 +; MAX256-NEXT: [[TMP72:%.*]] = insertelement <8 x float> [[TMP70]], float [[TMP71]], i32 4 +; MAX256-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x float> [[TMP72]], <8 x float> poison, <8 x i32> +; MAX256-NEXT: [[TMP73:%.*]] = extractelement <8 x float> [[TMP28]], i32 0 +; MAX256-NEXT: [[TMP74:%.*]] = insertelement <8 x float> poison, float [[TMP73]], i32 0 +; MAX256-NEXT: [[TMP75:%.*]] = insertelement <8 x float> [[TMP74]], float [[FVAL]], i32 1 +; MAX256-NEXT: [[TMP76:%.*]] = insertelement <8 x float> [[TMP75]], float [[TMP44]], i32 2 +; MAX256-NEXT: [[TMP77:%.*]] = extractelement <8 x float> [[TMP28]], i32 4 ; MAX256-NEXT: [[TMP78:%.*]] = insertelement <8 x float> [[TMP76]], float [[TMP77]], i32 3 -; MAX256-NEXT: [[TMP79:%.*]] = insertelement <8 x float> [[TMP78]], float [[TMP39]], i32 4 -; MAX256-NEXT: [[TMP80:%.*]] = insertelement <8 x float> [[TMP79]], float [[FVAL]], i32 5 -; MAX256-NEXT: [[TMP81:%.*]] = insertelement <8 x float> [[TMP80]], float [[FVAL]], i32 6 -; MAX256-NEXT: [[TMP82:%.*]] = extractelement <8 x float> [[TMP14]], i32 7 -; MAX256-NEXT: [[TMP83:%.*]] = insertelement <8 x float> [[TMP81]], float [[TMP82]], i32 7 -; MAX256-NEXT: [[TMP84:%.*]] = extractelement <8 x float> [[TMP28]], i32 0 -; MAX256-NEXT: [[TMP85:%.*]] = insertelement <8 x float> poison, float [[TMP84]], i32 0 -; MAX256-NEXT: [[TMP86:%.*]] = insertelement <8 x float> [[TMP85]], float [[FVAL]], i32 1 -; MAX256-NEXT: [[TMP87:%.*]] = insertelement <8 x float> [[TMP86]], float [[FVAL]], i32 2 -; MAX256-NEXT: [[TMP88:%.*]] = insertelement <8 x float> [[TMP87]], float [[TMP47]], i32 3 -; MAX256-NEXT: [[TMP89:%.*]] = extractelement <8 x float> [[TMP28]], i32 4 -; MAX256-NEXT: [[TMP90:%.*]] = insertelement <8 x float> [[TMP88]], float [[TMP89]], i32 4 -; MAX256-NEXT: [[TMP91:%.*]] = insertelement <8 x float> [[TMP90]], float [[FVAL]], i32 5 -; MAX256-NEXT: [[TMP92:%.*]] = insertelement <8 x float> [[TMP91]], float [[FVAL]], i32 6 -; MAX256-NEXT: [[TMP93:%.*]] = insertelement <8 x float> [[TMP92]], float [[TMP53]], i32 7 -; MAX256-NEXT: [[TMP94:%.*]] = extractelement <8 x float> [[TMP30]], i32 0 -; MAX256-NEXT: [[TMP95:%.*]] = insertelement <8 x float> poison, float [[TMP94]], i32 0 -; MAX256-NEXT: [[TMP96:%.*]] = insertelement <8 x float> [[TMP95]], float [[FVAL]], i32 1 -; MAX256-NEXT: [[TMP97:%.*]] = insertelement <8 x float> [[TMP96]], float [[FVAL]], i32 2 -; MAX256-NEXT: [[TMP98:%.*]] = insertelement <8 x float> [[TMP97]], float [[TMP57]], i32 3 -; MAX256-NEXT: [[TMP99:%.*]] = extractelement <8 x float> [[TMP30]], i32 4 -; MAX256-NEXT: [[TMP100:%.*]] = insertelement <8 x float> [[TMP98]], float [[TMP99]], i32 4 -; MAX256-NEXT: [[TMP101:%.*]] = insertelement <8 x float> [[TMP100]], float [[FVAL]], i32 5 -; MAX256-NEXT: [[TMP102:%.*]] = insertelement <8 x float> [[TMP101]], float [[FVAL]], i32 6 -; MAX256-NEXT: [[TMP103:%.*]] = insertelement <8 x float> [[TMP102]], float [[TMP63]], i32 7 -; MAX256-NEXT: [[TMP104:%.*]] = extractelement <8 x float> [[TMP32]], i32 0 -; MAX256-NEXT: [[TMP105:%.*]] = insertelement <8 x float> poison, float [[TMP104]], i32 0 -; MAX256-NEXT: [[TMP106:%.*]] = insertelement <8 x float> [[TMP105]], float [[FVAL]], i32 1 -; MAX256-NEXT: [[TMP107:%.*]] = insertelement <8 x float> [[TMP106]], float [[FVAL]], i32 2 -; MAX256-NEXT: [[TMP108:%.*]] = insertelement <8 x float> [[TMP107]], float [[TMP67]], i32 3 -; MAX256-NEXT: [[TMP109:%.*]] = extractelement <8 x float> [[TMP32]], i32 4 -; MAX256-NEXT: [[TMP110:%.*]] = insertelement <8 x float> [[TMP108]], float [[TMP109]], i32 4 -; MAX256-NEXT: [[TMP111:%.*]] = insertelement <8 x float> [[TMP110]], float [[FVAL]], i32 5 -; MAX256-NEXT: [[TMP112:%.*]] = insertelement <8 x float> [[TMP111]], float [[FVAL]], i32 6 -; MAX256-NEXT: [[TMP113:%.*]] = insertelement <8 x float> [[TMP112]], float [[TMP73]], i32 7 +; MAX256-NEXT: [[TMP79:%.*]] = insertelement <8 x float> [[TMP78]], float [[TMP48]], i32 4 +; MAX256-NEXT: [[SHUFFLE4:%.*]] = shufflevector <8 x float> [[TMP79]], <8 x float> poison, <8 x i32> +; MAX256-NEXT: [[TMP80:%.*]] = extractelement <8 x float> [[TMP30]], i32 0 +; MAX256-NEXT: [[TMP81:%.*]] = insertelement <8 x float> poison, float [[TMP80]], i32 0 +; MAX256-NEXT: [[TMP82:%.*]] = insertelement <8 x float> [[TMP81]], float [[FVAL]], i32 1 +; MAX256-NEXT: [[TMP83:%.*]] = insertelement <8 x float> [[TMP82]], float [[TMP52]], i32 2 +; MAX256-NEXT: [[TMP84:%.*]] = extractelement <8 x float> [[TMP30]], i32 4 +; MAX256-NEXT: [[TMP85:%.*]] = insertelement <8 x float> [[TMP83]], float [[TMP84]], i32 3 +; MAX256-NEXT: [[TMP86:%.*]] = insertelement <8 x float> [[TMP85]], float [[TMP56]], i32 4 +; MAX256-NEXT: [[SHUFFLE7:%.*]] = shufflevector <8 x float> [[TMP86]], <8 x float> poison, <8 x i32> +; MAX256-NEXT: [[TMP87:%.*]] = extractelement <8 x float> [[TMP32]], i32 0 +; MAX256-NEXT: [[TMP88:%.*]] = insertelement <8 x float> poison, float [[TMP87]], i32 0 +; MAX256-NEXT: [[TMP89:%.*]] = insertelement <8 x float> [[TMP88]], float [[FVAL]], i32 1 +; MAX256-NEXT: [[TMP90:%.*]] = insertelement <8 x float> [[TMP89]], float [[TMP60]], i32 2 +; MAX256-NEXT: [[TMP91:%.*]] = extractelement <8 x float> [[TMP32]], i32 4 +; MAX256-NEXT: [[TMP92:%.*]] = insertelement <8 x float> [[TMP90]], float [[TMP91]], i32 3 +; MAX256-NEXT: [[TMP93:%.*]] = insertelement <8 x float> [[TMP92]], float [[TMP64]], i32 4 +; MAX256-NEXT: [[SHUFFLE10:%.*]] = shufflevector <8 x float> [[TMP93]], <8 x float> poison, <8 x i32> ; MAX256-NEXT: br label [[BB2]] ; MAX256: bb5: -; MAX256-NEXT: [[TMP114:%.*]] = insertelement <8 x float> [[TMP5]], float [[TMP35]], i32 1 -; MAX256-NEXT: [[TMP115:%.*]] = insertelement <8 x float> [[TMP114]], float [[FVAL]], i32 2 -; MAX256-NEXT: [[TMP116:%.*]] = extractelement <8 x float> [[TMP14]], i32 3 -; MAX256-NEXT: [[TMP117:%.*]] = insertelement <8 x float> [[TMP115]], float [[TMP116]], i32 3 -; MAX256-NEXT: [[TMP118:%.*]] = insertelement <8 x float> [[TMP117]], float [[FVAL]], i32 4 -; MAX256-NEXT: [[TMP119:%.*]] = insertelement <8 x float> [[TMP118]], float [[TMP41]], i32 5 -; MAX256-NEXT: [[TMP120:%.*]] = insertelement <8 x float> [[TMP119]], float [[FVAL]], i32 6 -; MAX256-NEXT: [[TMP121:%.*]] = extractelement <8 x float> [[TMP14]], i32 7 -; MAX256-NEXT: [[TMP122:%.*]] = insertelement <8 x float> [[TMP120]], float [[TMP121]], i32 7 -; MAX256-NEXT: [[TMP123:%.*]] = extractelement <8 x float> [[TMP28]], i32 0 -; MAX256-NEXT: [[TMP124:%.*]] = insertelement <8 x float> poison, float [[TMP123]], i32 0 -; MAX256-NEXT: [[TMP125:%.*]] = insertelement <8 x float> [[TMP124]], float [[FVAL]], i32 1 -; MAX256-NEXT: [[TMP126:%.*]] = insertelement <8 x float> [[TMP125]], float [[TMP45]], i32 2 -; MAX256-NEXT: [[TMP127:%.*]] = insertelement <8 x float> [[TMP126]], float [[FVAL]], i32 3 -; MAX256-NEXT: [[TMP128:%.*]] = extractelement <8 x float> [[TMP28]], i32 4 -; MAX256-NEXT: [[TMP129:%.*]] = insertelement <8 x float> [[TMP127]], float [[TMP128]], i32 4 -; MAX256-NEXT: [[TMP130:%.*]] = insertelement <8 x float> [[TMP129]], float [[FVAL]], i32 5 -; MAX256-NEXT: [[TMP131:%.*]] = insertelement <8 x float> [[TMP130]], float [[TMP51]], i32 6 -; MAX256-NEXT: [[TMP132:%.*]] = insertelement <8 x float> [[TMP131]], float [[FVAL]], i32 7 -; MAX256-NEXT: [[TMP133:%.*]] = extractelement <8 x float> [[TMP30]], i32 0 -; MAX256-NEXT: [[TMP134:%.*]] = insertelement <8 x float> poison, float [[TMP133]], i32 0 -; MAX256-NEXT: [[TMP135:%.*]] = insertelement <8 x float> [[TMP134]], float [[FVAL]], i32 1 -; MAX256-NEXT: [[TMP136:%.*]] = insertelement <8 x float> [[TMP135]], float [[TMP55]], i32 2 -; MAX256-NEXT: [[TMP137:%.*]] = insertelement <8 x float> [[TMP136]], float [[FVAL]], i32 3 -; MAX256-NEXT: [[TMP138:%.*]] = extractelement <8 x float> [[TMP30]], i32 4 -; MAX256-NEXT: [[TMP139:%.*]] = insertelement <8 x float> [[TMP137]], float [[TMP138]], i32 4 -; MAX256-NEXT: [[TMP140:%.*]] = insertelement <8 x float> [[TMP139]], float [[FVAL]], i32 5 -; MAX256-NEXT: [[TMP141:%.*]] = insertelement <8 x float> [[TMP140]], float [[TMP61]], i32 6 -; MAX256-NEXT: [[TMP142:%.*]] = insertelement <8 x float> [[TMP141]], float [[FVAL]], i32 7 -; MAX256-NEXT: [[TMP143:%.*]] = extractelement <8 x float> [[TMP32]], i32 0 -; MAX256-NEXT: [[TMP144:%.*]] = insertelement <8 x float> poison, float [[TMP143]], i32 0 -; MAX256-NEXT: [[TMP145:%.*]] = insertelement <8 x float> [[TMP144]], float [[FVAL]], i32 1 -; MAX256-NEXT: [[TMP146:%.*]] = insertelement <8 x float> [[TMP145]], float [[TMP65]], i32 2 -; MAX256-NEXT: [[TMP147:%.*]] = insertelement <8 x float> [[TMP146]], float [[FVAL]], i32 3 -; MAX256-NEXT: [[TMP148:%.*]] = extractelement <8 x float> [[TMP32]], i32 4 -; MAX256-NEXT: [[TMP149:%.*]] = insertelement <8 x float> [[TMP147]], float [[TMP148]], i32 4 -; MAX256-NEXT: [[TMP150:%.*]] = insertelement <8 x float> [[TMP149]], float [[FVAL]], i32 5 -; MAX256-NEXT: [[TMP151:%.*]] = insertelement <8 x float> [[TMP150]], float [[TMP71]], i32 6 -; MAX256-NEXT: [[TMP152:%.*]] = insertelement <8 x float> [[TMP151]], float [[FVAL]], i32 7 +; MAX256-NEXT: [[TMP94:%.*]] = insertelement <8 x float> [[TMP5]], float [[TMP36]], i32 1 +; MAX256-NEXT: [[TMP95:%.*]] = extractelement <8 x float> [[TMP14]], i32 3 +; MAX256-NEXT: [[TMP96:%.*]] = insertelement <8 x float> [[TMP94]], float [[TMP95]], i32 2 +; MAX256-NEXT: [[TMP97:%.*]] = insertelement <8 x float> [[TMP96]], float [[TMP40]], i32 3 +; MAX256-NEXT: [[TMP98:%.*]] = extractelement <8 x float> [[TMP14]], i32 7 +; MAX256-NEXT: [[TMP99:%.*]] = insertelement <8 x float> [[TMP97]], float [[TMP98]], i32 4 +; MAX256-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x float> [[TMP99]], <8 x float> poison, <8 x i32> +; MAX256-NEXT: [[TMP100:%.*]] = extractelement <8 x float> [[TMP28]], i32 0 +; MAX256-NEXT: [[TMP101:%.*]] = insertelement <8 x float> poison, float [[TMP100]], i32 0 +; MAX256-NEXT: [[TMP102:%.*]] = insertelement <8 x float> [[TMP101]], float [[FVAL]], i32 1 +; MAX256-NEXT: [[TMP103:%.*]] = insertelement <8 x float> [[TMP102]], float [[TMP42]], i32 2 +; MAX256-NEXT: [[TMP104:%.*]] = extractelement <8 x float> [[TMP28]], i32 4 +; MAX256-NEXT: [[TMP105:%.*]] = insertelement <8 x float> [[TMP103]], float [[TMP104]], i32 3 +; MAX256-NEXT: [[TMP106:%.*]] = insertelement <8 x float> [[TMP105]], float [[TMP46]], i32 4 +; MAX256-NEXT: [[SHUFFLE5:%.*]] = shufflevector <8 x float> [[TMP106]], <8 x float> poison, <8 x i32> +; MAX256-NEXT: [[TMP107:%.*]] = extractelement <8 x float> [[TMP30]], i32 0 +; MAX256-NEXT: [[TMP108:%.*]] = insertelement <8 x float> poison, float [[TMP107]], i32 0 +; MAX256-NEXT: [[TMP109:%.*]] = insertelement <8 x float> [[TMP108]], float [[FVAL]], i32 1 +; MAX256-NEXT: [[TMP110:%.*]] = insertelement <8 x float> [[TMP109]], float [[TMP50]], i32 2 +; MAX256-NEXT: [[TMP111:%.*]] = extractelement <8 x float> [[TMP30]], i32 4 +; MAX256-NEXT: [[TMP112:%.*]] = insertelement <8 x float> [[TMP110]], float [[TMP111]], i32 3 +; MAX256-NEXT: [[TMP113:%.*]] = insertelement <8 x float> [[TMP112]], float [[TMP54]], i32 4 +; MAX256-NEXT: [[SHUFFLE8:%.*]] = shufflevector <8 x float> [[TMP113]], <8 x float> poison, <8 x i32> +; MAX256-NEXT: [[TMP114:%.*]] = extractelement <8 x float> [[TMP32]], i32 0 +; MAX256-NEXT: [[TMP115:%.*]] = insertelement <8 x float> poison, float [[TMP114]], i32 0 +; MAX256-NEXT: [[TMP116:%.*]] = insertelement <8 x float> [[TMP115]], float [[FVAL]], i32 1 +; MAX256-NEXT: [[TMP117:%.*]] = insertelement <8 x float> [[TMP116]], float [[TMP58]], i32 2 +; MAX256-NEXT: [[TMP118:%.*]] = extractelement <8 x float> [[TMP32]], i32 4 +; MAX256-NEXT: [[TMP119:%.*]] = insertelement <8 x float> [[TMP117]], float [[TMP118]], i32 3 +; MAX256-NEXT: [[TMP120:%.*]] = insertelement <8 x float> [[TMP119]], float [[TMP62]], i32 4 +; MAX256-NEXT: [[SHUFFLE11:%.*]] = shufflevector <8 x float> [[TMP120]], <8 x float> poison, <8 x i32> ; MAX256-NEXT: br label [[BB2]] ; MAX256: bb2: -; MAX256-NEXT: [[TMP153:%.*]] = phi <8 x float> [ [[TMP14]], [[BB3]] ], [ [[TMP83]], [[BB4]] ], [ [[TMP122]], [[BB5]] ], [ [[TMP44]], [[BB1]] ] -; MAX256-NEXT: [[TMP154:%.*]] = phi <8 x float> [ [[TMP28]], [[BB3]] ], [ [[TMP93]], [[BB4]] ], [ [[TMP132]], [[BB5]] ], [ [[TMP54]], [[BB1]] ] -; MAX256-NEXT: [[TMP155:%.*]] = phi <8 x float> [ [[TMP30]], [[BB3]] ], [ [[TMP103]], [[BB4]] ], [ [[TMP142]], [[BB5]] ], [ [[TMP64]], [[BB1]] ] -; MAX256-NEXT: [[TMP156:%.*]] = phi <8 x float> [ [[TMP32]], [[BB3]] ], [ [[TMP113]], [[BB4]] ], [ [[TMP152]], [[BB5]] ], [ [[TMP74]], [[BB1]] ] -; MAX256-NEXT: [[TMP157:%.*]] = extractelement <8 x float> [[TMP156]], i32 6 -; MAX256-NEXT: store float [[TMP157]], float* undef, align 4 +; MAX256-NEXT: [[TMP121:%.*]] = phi <8 x float> [ [[TMP14]], [[BB3]] ], [ [[SHUFFLE1]], [[BB4]] ], [ [[SHUFFLE2]], [[BB5]] ], [ [[SHUFFLE3]], [[BB1]] ] +; MAX256-NEXT: [[TMP122:%.*]] = phi <8 x float> [ [[TMP28]], [[BB3]] ], [ [[SHUFFLE4]], [[BB4]] ], [ [[SHUFFLE5]], [[BB5]] ], [ [[SHUFFLE6]], [[BB1]] ] +; MAX256-NEXT: [[TMP123:%.*]] = phi <8 x float> [ [[TMP30]], [[BB3]] ], [ [[SHUFFLE7]], [[BB4]] ], [ [[SHUFFLE8]], [[BB5]] ], [ [[SHUFFLE9]], [[BB1]] ] +; MAX256-NEXT: [[TMP124:%.*]] = phi <8 x float> [ [[TMP32]], [[BB3]] ], [ [[SHUFFLE10]], [[BB4]] ], [ [[SHUFFLE11]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ] +; MAX256-NEXT: [[TMP125:%.*]] = extractelement <8 x float> [[TMP124]], i32 6 +; MAX256-NEXT: store float [[TMP125]], float* undef, align 4 ; MAX256-NEXT: ret void ; ; MAX1024-LABEL: @phi_float32( @@ -350,54 +330,40 @@ ; MAX1024-NEXT: [[TMP36:%.*]] = insertelement <32 x float> [[TMP35]], float [[FVAL]], i32 31 ; MAX1024-NEXT: [[TMP37:%.*]] = fmul <32 x float> [[SHUFFLE]], [[TMP36]] ; MAX1024-NEXT: [[TMP38:%.*]] = fadd <32 x float> zeroinitializer, [[TMP37]] -; MAX1024-NEXT: [[TMP39:%.*]] = extractelement <32 x float> [[TMP38]], i32 0 -; MAX1024-NEXT: [[TMP40:%.*]] = insertelement <32 x float> poison, float [[TMP39]], i32 0 -; MAX1024-NEXT: [[TMP41:%.*]] = extractelement <32 x float> [[TMP38]], i32 1 -; MAX1024-NEXT: [[TMP42:%.*]] = insertelement <32 x float> [[TMP40]], float [[TMP41]], i32 1 -; MAX1024-NEXT: [[TMP43:%.*]] = insertelement <32 x float> [[TMP42]], float [[FVAL]], i32 2 -; MAX1024-NEXT: [[TMP44:%.*]] = insertelement <32 x float> [[TMP43]], float [[FVAL]], i32 3 -; MAX1024-NEXT: [[TMP45:%.*]] = extractelement <32 x float> [[TMP38]], i32 4 -; MAX1024-NEXT: [[TMP46:%.*]] = insertelement <32 x float> [[TMP44]], float [[TMP45]], i32 4 -; MAX1024-NEXT: [[TMP47:%.*]] = extractelement <32 x float> [[TMP38]], i32 5 -; MAX1024-NEXT: [[TMP48:%.*]] = insertelement <32 x float> [[TMP46]], float [[TMP47]], i32 5 -; MAX1024-NEXT: [[TMP49:%.*]] = insertelement <32 x float> [[TMP48]], float [[FVAL]], i32 6 -; MAX1024-NEXT: [[TMP50:%.*]] = insertelement <32 x float> [[TMP49]], float [[FVAL]], i32 7 -; MAX1024-NEXT: [[TMP51:%.*]] = insertelement <32 x float> [[TMP50]], float [[FVAL]], i32 8 -; MAX1024-NEXT: [[TMP52:%.*]] = insertelement <32 x float> [[TMP51]], float [[FVAL]], i32 9 -; MAX1024-NEXT: [[TMP53:%.*]] = extractelement <32 x float> [[TMP38]], i32 10 -; MAX1024-NEXT: [[TMP54:%.*]] = insertelement <32 x float> [[TMP52]], float [[TMP53]], i32 10 -; MAX1024-NEXT: [[TMP55:%.*]] = extractelement <32 x float> [[TMP38]], i32 11 -; MAX1024-NEXT: [[TMP56:%.*]] = insertelement <32 x float> [[TMP54]], float [[TMP55]], i32 11 -; MAX1024-NEXT: [[TMP57:%.*]] = insertelement <32 x float> [[TMP56]], float [[FVAL]], i32 12 -; MAX1024-NEXT: [[TMP58:%.*]] = insertelement <32 x float> [[TMP57]], float [[FVAL]], i32 13 -; MAX1024-NEXT: [[TMP59:%.*]] = extractelement <32 x float> [[TMP38]], i32 14 -; MAX1024-NEXT: [[TMP60:%.*]] = insertelement <32 x float> [[TMP58]], float [[TMP59]], i32 14 -; MAX1024-NEXT: [[TMP61:%.*]] = extractelement <32 x float> [[TMP38]], i32 15 -; MAX1024-NEXT: [[TMP62:%.*]] = insertelement <32 x float> [[TMP60]], float [[TMP61]], i32 15 -; MAX1024-NEXT: [[TMP63:%.*]] = insertelement <32 x float> [[TMP62]], float [[FVAL]], i32 16 -; MAX1024-NEXT: [[TMP64:%.*]] = insertelement <32 x float> [[TMP63]], float [[FVAL]], i32 17 -; MAX1024-NEXT: [[TMP65:%.*]] = extractelement <32 x float> [[TMP38]], i32 18 -; MAX1024-NEXT: [[TMP66:%.*]] = insertelement <32 x float> [[TMP64]], float [[TMP65]], i32 18 -; MAX1024-NEXT: [[TMP67:%.*]] = extractelement <32 x float> [[TMP38]], i32 19 -; MAX1024-NEXT: [[TMP68:%.*]] = insertelement <32 x float> [[TMP66]], float [[TMP67]], i32 19 -; MAX1024-NEXT: [[TMP69:%.*]] = insertelement <32 x float> [[TMP68]], float [[FVAL]], i32 20 -; MAX1024-NEXT: [[TMP70:%.*]] = insertelement <32 x float> [[TMP69]], float [[FVAL]], i32 21 -; MAX1024-NEXT: [[TMP71:%.*]] = extractelement <32 x float> [[TMP38]], i32 22 -; MAX1024-NEXT: [[TMP72:%.*]] = insertelement <32 x float> [[TMP70]], float [[TMP71]], i32 22 -; MAX1024-NEXT: [[TMP73:%.*]] = extractelement <32 x float> [[TMP38]], i32 23 -; MAX1024-NEXT: [[TMP74:%.*]] = insertelement <32 x float> [[TMP72]], float [[TMP73]], i32 23 -; MAX1024-NEXT: [[TMP75:%.*]] = insertelement <32 x float> [[TMP74]], float [[FVAL]], i32 24 -; MAX1024-NEXT: [[TMP76:%.*]] = insertelement <32 x float> [[TMP75]], float [[FVAL]], i32 25 -; MAX1024-NEXT: [[TMP77:%.*]] = extractelement <32 x float> [[TMP38]], i32 26 -; MAX1024-NEXT: [[TMP78:%.*]] = insertelement <32 x float> [[TMP76]], float [[TMP77]], i32 26 -; MAX1024-NEXT: [[TMP79:%.*]] = extractelement <32 x float> [[TMP38]], i32 27 -; MAX1024-NEXT: [[TMP80:%.*]] = insertelement <32 x float> [[TMP78]], float [[TMP79]], i32 27 -; MAX1024-NEXT: [[TMP81:%.*]] = insertelement <32 x float> [[TMP80]], float [[FVAL]], i32 28 -; MAX1024-NEXT: [[TMP82:%.*]] = insertelement <32 x float> [[TMP81]], float [[FVAL]], i32 29 -; MAX1024-NEXT: [[TMP83:%.*]] = extractelement <32 x float> [[TMP38]], i32 30 -; MAX1024-NEXT: [[TMP84:%.*]] = insertelement <32 x float> [[TMP82]], float [[TMP83]], i32 30 -; MAX1024-NEXT: [[TMP85:%.*]] = extractelement <32 x float> [[TMP38]], i32 31 -; MAX1024-NEXT: [[TMP86:%.*]] = insertelement <32 x float> [[TMP84]], float [[TMP85]], i32 31 +; MAX1024-NEXT: [[TMP39:%.*]] = insertelement <32 x float> poison, float [[FVAL]], i32 2 +; MAX1024-NEXT: [[TMP40:%.*]] = extractelement <32 x float> [[TMP38]], i32 0 +; MAX1024-NEXT: [[TMP41:%.*]] = insertelement <32 x float> [[TMP39]], float [[TMP40]], i32 0 +; MAX1024-NEXT: [[TMP42:%.*]] = extractelement <32 x float> [[TMP38]], i32 1 +; MAX1024-NEXT: [[TMP43:%.*]] = insertelement <32 x float> [[TMP41]], float [[TMP42]], i32 1 +; MAX1024-NEXT: [[TMP44:%.*]] = extractelement <32 x float> [[TMP38]], i32 4 +; MAX1024-NEXT: [[TMP45:%.*]] = insertelement <32 x float> [[TMP43]], float [[TMP44]], i32 3 +; MAX1024-NEXT: [[TMP46:%.*]] = extractelement <32 x float> [[TMP38]], i32 5 +; MAX1024-NEXT: [[TMP47:%.*]] = insertelement <32 x float> [[TMP45]], float [[TMP46]], i32 4 +; MAX1024-NEXT: [[TMP48:%.*]] = extractelement <32 x float> [[TMP38]], i32 10 +; MAX1024-NEXT: [[TMP49:%.*]] = insertelement <32 x float> [[TMP47]], float [[TMP48]], i32 5 +; MAX1024-NEXT: [[TMP50:%.*]] = extractelement <32 x float> [[TMP38]], i32 11 +; MAX1024-NEXT: [[TMP51:%.*]] = insertelement <32 x float> [[TMP49]], float [[TMP50]], i32 6 +; MAX1024-NEXT: [[TMP52:%.*]] = extractelement <32 x float> [[TMP38]], i32 14 +; MAX1024-NEXT: [[TMP53:%.*]] = insertelement <32 x float> [[TMP51]], float [[TMP52]], i32 7 +; MAX1024-NEXT: [[TMP54:%.*]] = extractelement <32 x float> [[TMP38]], i32 15 +; MAX1024-NEXT: [[TMP55:%.*]] = insertelement <32 x float> [[TMP53]], float [[TMP54]], i32 8 +; MAX1024-NEXT: [[TMP56:%.*]] = extractelement <32 x float> [[TMP38]], i32 18 +; MAX1024-NEXT: [[TMP57:%.*]] = insertelement <32 x float> [[TMP55]], float [[TMP56]], i32 9 +; MAX1024-NEXT: [[TMP58:%.*]] = extractelement <32 x float> [[TMP38]], i32 19 +; MAX1024-NEXT: [[TMP59:%.*]] = insertelement <32 x float> [[TMP57]], float [[TMP58]], i32 10 +; MAX1024-NEXT: [[TMP60:%.*]] = extractelement <32 x float> [[TMP38]], i32 22 +; MAX1024-NEXT: [[TMP61:%.*]] = insertelement <32 x float> [[TMP59]], float [[TMP60]], i32 11 +; MAX1024-NEXT: [[TMP62:%.*]] = extractelement <32 x float> [[TMP38]], i32 23 +; MAX1024-NEXT: [[TMP63:%.*]] = insertelement <32 x float> [[TMP61]], float [[TMP62]], i32 12 +; MAX1024-NEXT: [[TMP64:%.*]] = extractelement <32 x float> [[TMP38]], i32 26 +; MAX1024-NEXT: [[TMP65:%.*]] = insertelement <32 x float> [[TMP63]], float [[TMP64]], i32 13 +; MAX1024-NEXT: [[TMP66:%.*]] = extractelement <32 x float> [[TMP38]], i32 27 +; MAX1024-NEXT: [[TMP67:%.*]] = insertelement <32 x float> [[TMP65]], float [[TMP66]], i32 14 +; MAX1024-NEXT: [[TMP68:%.*]] = extractelement <32 x float> [[TMP38]], i32 30 +; MAX1024-NEXT: [[TMP69:%.*]] = insertelement <32 x float> [[TMP67]], float [[TMP68]], i32 15 +; MAX1024-NEXT: [[TMP70:%.*]] = extractelement <32 x float> [[TMP38]], i32 31 +; MAX1024-NEXT: [[TMP71:%.*]] = insertelement <32 x float> [[TMP69]], float [[TMP70]], i32 16 +; MAX1024-NEXT: [[SHUFFLE3:%.*]] = shufflevector <32 x float> [[TMP71]], <32 x float> poison, <32 x i32> ; MAX1024-NEXT: switch i32 undef, label [[BB5:%.*]] [ ; MAX1024-NEXT: i32 0, label [[BB2:%.*]] ; MAX1024-NEXT: i32 1, label [[BB3:%.*]] @@ -406,91 +372,64 @@ ; MAX1024: bb3: ; MAX1024-NEXT: br label [[BB2]] ; MAX1024: bb4: -; MAX1024-NEXT: [[TMP87:%.*]] = insertelement <32 x float> [[TMP40]], float [[FVAL]], i32 1 -; MAX1024-NEXT: [[TMP88:%.*]] = insertelement <32 x float> [[TMP87]], float [[FVAL]], i32 2 -; MAX1024-NEXT: [[TMP89:%.*]] = extractelement <32 x float> [[TMP38]], i32 3 -; MAX1024-NEXT: [[TMP90:%.*]] = insertelement <32 x float> [[TMP88]], float [[TMP89]], i32 3 -; MAX1024-NEXT: [[TMP91:%.*]] = insertelement <32 x float> [[TMP90]], float [[TMP45]], i32 4 -; MAX1024-NEXT: [[TMP92:%.*]] = insertelement <32 x float> [[TMP91]], float [[FVAL]], i32 5 -; MAX1024-NEXT: [[TMP93:%.*]] = insertelement <32 x float> [[TMP92]], float [[FVAL]], i32 6 -; MAX1024-NEXT: [[TMP94:%.*]] = extractelement <32 x float> [[TMP38]], i32 7 -; MAX1024-NEXT: [[TMP95:%.*]] = insertelement <32 x float> [[TMP93]], float [[TMP94]], i32 7 -; MAX1024-NEXT: [[TMP96:%.*]] = extractelement <32 x float> [[TMP38]], i32 8 -; MAX1024-NEXT: [[TMP97:%.*]] = insertelement <32 x float> [[TMP95]], float [[TMP96]], i32 8 -; MAX1024-NEXT: [[TMP98:%.*]] = insertelement <32 x float> [[TMP97]], float [[FVAL]], i32 9 -; MAX1024-NEXT: [[TMP99:%.*]] = insertelement <32 x float> [[TMP98]], float [[FVAL]], i32 10 -; MAX1024-NEXT: [[TMP100:%.*]] = insertelement <32 x float> [[TMP99]], float [[TMP55]], i32 11 -; MAX1024-NEXT: [[TMP101:%.*]] = extractelement <32 x float> [[TMP38]], i32 12 -; MAX1024-NEXT: [[TMP102:%.*]] = insertelement <32 x float> [[TMP100]], float [[TMP101]], i32 12 -; MAX1024-NEXT: [[TMP103:%.*]] = insertelement <32 x float> [[TMP102]], float [[FVAL]], i32 13 -; MAX1024-NEXT: [[TMP104:%.*]] = insertelement <32 x float> [[TMP103]], float [[FVAL]], i32 14 -; MAX1024-NEXT: [[TMP105:%.*]] = insertelement <32 x float> [[TMP104]], float [[TMP61]], i32 15 -; MAX1024-NEXT: [[TMP106:%.*]] = extractelement <32 x float> [[TMP38]], i32 16 -; MAX1024-NEXT: [[TMP107:%.*]] = insertelement <32 x float> [[TMP105]], float [[TMP106]], i32 16 -; MAX1024-NEXT: [[TMP108:%.*]] = insertelement <32 x float> [[TMP107]], float [[FVAL]], i32 17 -; MAX1024-NEXT: [[TMP109:%.*]] = insertelement <32 x float> [[TMP108]], float [[FVAL]], i32 18 -; MAX1024-NEXT: [[TMP110:%.*]] = insertelement <32 x float> [[TMP109]], float [[TMP67]], i32 19 -; MAX1024-NEXT: [[TMP111:%.*]] = extractelement <32 x float> [[TMP38]], i32 20 -; MAX1024-NEXT: [[TMP112:%.*]] = insertelement <32 x float> [[TMP110]], float [[TMP111]], i32 20 -; MAX1024-NEXT: [[TMP113:%.*]] = insertelement <32 x float> [[TMP112]], float [[FVAL]], i32 21 -; MAX1024-NEXT: [[TMP114:%.*]] = insertelement <32 x float> [[TMP113]], float [[FVAL]], i32 22 -; MAX1024-NEXT: [[TMP115:%.*]] = insertelement <32 x float> [[TMP114]], float [[TMP73]], i32 23 -; MAX1024-NEXT: [[TMP116:%.*]] = extractelement <32 x float> [[TMP38]], i32 24 -; MAX1024-NEXT: [[TMP117:%.*]] = insertelement <32 x float> [[TMP115]], float [[TMP116]], i32 24 -; MAX1024-NEXT: [[TMP118:%.*]] = insertelement <32 x float> [[TMP117]], float [[FVAL]], i32 25 -; MAX1024-NEXT: [[TMP119:%.*]] = insertelement <32 x float> [[TMP118]], float [[FVAL]], i32 26 -; MAX1024-NEXT: [[TMP120:%.*]] = insertelement <32 x float> [[TMP119]], float [[TMP79]], i32 27 -; MAX1024-NEXT: [[TMP121:%.*]] = extractelement <32 x float> [[TMP38]], i32 28 -; MAX1024-NEXT: [[TMP122:%.*]] = insertelement <32 x float> [[TMP120]], float [[TMP121]], i32 28 -; MAX1024-NEXT: [[TMP123:%.*]] = insertelement <32 x float> [[TMP122]], float [[FVAL]], i32 29 -; MAX1024-NEXT: [[TMP124:%.*]] = insertelement <32 x float> [[TMP123]], float [[FVAL]], i32 30 -; MAX1024-NEXT: [[TMP125:%.*]] = insertelement <32 x float> [[TMP124]], float [[TMP85]], i32 31 +; MAX1024-NEXT: [[TMP72:%.*]] = insertelement <32 x float> poison, float [[TMP40]], i32 0 +; MAX1024-NEXT: [[TMP73:%.*]] = insertelement <32 x float> [[TMP72]], float [[FVAL]], i32 1 +; MAX1024-NEXT: [[TMP74:%.*]] = extractelement <32 x float> [[TMP38]], i32 3 +; MAX1024-NEXT: [[TMP75:%.*]] = insertelement <32 x float> [[TMP73]], float [[TMP74]], i32 2 +; MAX1024-NEXT: [[TMP76:%.*]] = insertelement <32 x float> [[TMP75]], float [[TMP44]], i32 3 +; MAX1024-NEXT: [[TMP77:%.*]] = extractelement <32 x float> [[TMP38]], i32 7 +; MAX1024-NEXT: [[TMP78:%.*]] = insertelement <32 x float> [[TMP76]], float [[TMP77]], i32 4 +; MAX1024-NEXT: [[TMP79:%.*]] = extractelement <32 x float> [[TMP38]], i32 8 +; MAX1024-NEXT: [[TMP80:%.*]] = insertelement <32 x float> [[TMP78]], float [[TMP79]], i32 5 +; MAX1024-NEXT: [[TMP81:%.*]] = insertelement <32 x float> [[TMP80]], float [[TMP50]], i32 6 +; MAX1024-NEXT: [[TMP82:%.*]] = extractelement <32 x float> [[TMP38]], i32 12 +; MAX1024-NEXT: [[TMP83:%.*]] = insertelement <32 x float> [[TMP81]], float [[TMP82]], i32 7 +; MAX1024-NEXT: [[TMP84:%.*]] = insertelement <32 x float> [[TMP83]], float [[TMP54]], i32 8 +; MAX1024-NEXT: [[TMP85:%.*]] = extractelement <32 x float> [[TMP38]], i32 16 +; MAX1024-NEXT: [[TMP86:%.*]] = insertelement <32 x float> [[TMP84]], float [[TMP85]], i32 9 +; MAX1024-NEXT: [[TMP87:%.*]] = insertelement <32 x float> [[TMP86]], float [[TMP58]], i32 10 +; MAX1024-NEXT: [[TMP88:%.*]] = extractelement <32 x float> [[TMP38]], i32 20 +; MAX1024-NEXT: [[TMP89:%.*]] = insertelement <32 x float> [[TMP87]], float [[TMP88]], i32 11 +; MAX1024-NEXT: [[TMP90:%.*]] = insertelement <32 x float> [[TMP89]], float [[TMP62]], i32 12 +; MAX1024-NEXT: [[TMP91:%.*]] = extractelement <32 x float> [[TMP38]], i32 24 +; MAX1024-NEXT: [[TMP92:%.*]] = insertelement <32 x float> [[TMP90]], float [[TMP91]], i32 13 +; MAX1024-NEXT: [[TMP93:%.*]] = insertelement <32 x float> [[TMP92]], float [[TMP66]], i32 14 +; MAX1024-NEXT: [[TMP94:%.*]] = extractelement <32 x float> [[TMP38]], i32 28 +; MAX1024-NEXT: [[TMP95:%.*]] = insertelement <32 x float> [[TMP93]], float [[TMP94]], i32 15 +; MAX1024-NEXT: [[TMP96:%.*]] = insertelement <32 x float> [[TMP95]], float [[TMP70]], i32 16 +; MAX1024-NEXT: [[SHUFFLE1:%.*]] = shufflevector <32 x float> [[TMP96]], <32 x float> poison, <32 x i32> ; MAX1024-NEXT: br label [[BB2]] ; MAX1024: bb5: -; MAX1024-NEXT: [[TMP126:%.*]] = insertelement <32 x float> [[TMP5]], float [[TMP41]], i32 1 -; MAX1024-NEXT: [[TMP127:%.*]] = insertelement <32 x float> [[TMP126]], float [[FVAL]], i32 2 -; MAX1024-NEXT: [[TMP128:%.*]] = extractelement <32 x float> [[TMP38]], i32 3 -; MAX1024-NEXT: [[TMP129:%.*]] = insertelement <32 x float> [[TMP127]], float [[TMP128]], i32 3 -; MAX1024-NEXT: [[TMP130:%.*]] = insertelement <32 x float> [[TMP129]], float [[FVAL]], i32 4 -; MAX1024-NEXT: [[TMP131:%.*]] = insertelement <32 x float> [[TMP130]], float [[TMP47]], i32 5 -; MAX1024-NEXT: [[TMP132:%.*]] = insertelement <32 x float> [[TMP131]], float [[FVAL]], i32 6 -; MAX1024-NEXT: [[TMP133:%.*]] = extractelement <32 x float> [[TMP38]], i32 7 -; MAX1024-NEXT: [[TMP134:%.*]] = insertelement <32 x float> [[TMP132]], float [[TMP133]], i32 7 -; MAX1024-NEXT: [[TMP135:%.*]] = extractelement <32 x float> [[TMP38]], i32 8 -; MAX1024-NEXT: [[TMP136:%.*]] = insertelement <32 x float> [[TMP134]], float [[TMP135]], i32 8 -; MAX1024-NEXT: [[TMP137:%.*]] = insertelement <32 x float> [[TMP136]], float [[FVAL]], i32 9 -; MAX1024-NEXT: [[TMP138:%.*]] = insertelement <32 x float> [[TMP137]], float [[TMP53]], i32 10 -; MAX1024-NEXT: [[TMP139:%.*]] = insertelement <32 x float> [[TMP138]], float [[FVAL]], i32 11 -; MAX1024-NEXT: [[TMP140:%.*]] = extractelement <32 x float> [[TMP38]], i32 12 -; MAX1024-NEXT: [[TMP141:%.*]] = insertelement <32 x float> [[TMP139]], float [[TMP140]], i32 12 -; MAX1024-NEXT: [[TMP142:%.*]] = insertelement <32 x float> [[TMP141]], float [[FVAL]], i32 13 -; MAX1024-NEXT: [[TMP143:%.*]] = insertelement <32 x float> [[TMP142]], float [[TMP59]], i32 14 -; MAX1024-NEXT: [[TMP144:%.*]] = insertelement <32 x float> [[TMP143]], float [[FVAL]], i32 15 -; MAX1024-NEXT: [[TMP145:%.*]] = extractelement <32 x float> [[TMP38]], i32 16 -; MAX1024-NEXT: [[TMP146:%.*]] = insertelement <32 x float> [[TMP144]], float [[TMP145]], i32 16 -; MAX1024-NEXT: [[TMP147:%.*]] = insertelement <32 x float> [[TMP146]], float [[FVAL]], i32 17 -; MAX1024-NEXT: [[TMP148:%.*]] = insertelement <32 x float> [[TMP147]], float [[TMP65]], i32 18 -; MAX1024-NEXT: [[TMP149:%.*]] = insertelement <32 x float> [[TMP148]], float [[FVAL]], i32 19 -; MAX1024-NEXT: [[TMP150:%.*]] = extractelement <32 x float> [[TMP38]], i32 20 -; MAX1024-NEXT: [[TMP151:%.*]] = insertelement <32 x float> [[TMP149]], float [[TMP150]], i32 20 -; MAX1024-NEXT: [[TMP152:%.*]] = insertelement <32 x float> [[TMP151]], float [[FVAL]], i32 21 -; MAX1024-NEXT: [[TMP153:%.*]] = insertelement <32 x float> [[TMP152]], float [[TMP71]], i32 22 -; MAX1024-NEXT: [[TMP154:%.*]] = insertelement <32 x float> [[TMP153]], float [[FVAL]], i32 23 -; MAX1024-NEXT: [[TMP155:%.*]] = extractelement <32 x float> [[TMP38]], i32 24 -; MAX1024-NEXT: [[TMP156:%.*]] = insertelement <32 x float> [[TMP154]], float [[TMP155]], i32 24 -; MAX1024-NEXT: [[TMP157:%.*]] = insertelement <32 x float> [[TMP156]], float [[FVAL]], i32 25 -; MAX1024-NEXT: [[TMP158:%.*]] = insertelement <32 x float> [[TMP157]], float [[TMP77]], i32 26 -; MAX1024-NEXT: [[TMP159:%.*]] = insertelement <32 x float> [[TMP158]], float [[FVAL]], i32 27 -; MAX1024-NEXT: [[TMP160:%.*]] = extractelement <32 x float> [[TMP38]], i32 28 -; MAX1024-NEXT: [[TMP161:%.*]] = insertelement <32 x float> [[TMP159]], float [[TMP160]], i32 28 -; MAX1024-NEXT: [[TMP162:%.*]] = insertelement <32 x float> [[TMP161]], float [[FVAL]], i32 29 -; MAX1024-NEXT: [[TMP163:%.*]] = insertelement <32 x float> [[TMP162]], float [[TMP83]], i32 30 -; MAX1024-NEXT: [[TMP164:%.*]] = insertelement <32 x float> [[TMP163]], float [[FVAL]], i32 31 +; MAX1024-NEXT: [[TMP97:%.*]] = insertelement <32 x float> [[TMP5]], float [[TMP42]], i32 1 +; MAX1024-NEXT: [[TMP98:%.*]] = extractelement <32 x float> [[TMP38]], i32 3 +; MAX1024-NEXT: [[TMP99:%.*]] = insertelement <32 x float> [[TMP97]], float [[TMP98]], i32 2 +; MAX1024-NEXT: [[TMP100:%.*]] = insertelement <32 x float> [[TMP99]], float [[TMP46]], i32 3 +; MAX1024-NEXT: [[TMP101:%.*]] = extractelement <32 x float> [[TMP38]], i32 7 +; MAX1024-NEXT: [[TMP102:%.*]] = insertelement <32 x float> [[TMP100]], float [[TMP101]], i32 4 +; MAX1024-NEXT: [[TMP103:%.*]] = extractelement <32 x float> [[TMP38]], i32 8 +; MAX1024-NEXT: [[TMP104:%.*]] = insertelement <32 x float> [[TMP102]], float [[TMP103]], i32 5 +; MAX1024-NEXT: [[TMP105:%.*]] = insertelement <32 x float> [[TMP104]], float [[TMP48]], i32 6 +; MAX1024-NEXT: [[TMP106:%.*]] = extractelement <32 x float> [[TMP38]], i32 12 +; MAX1024-NEXT: [[TMP107:%.*]] = insertelement <32 x float> [[TMP105]], float [[TMP106]], i32 7 +; MAX1024-NEXT: [[TMP108:%.*]] = insertelement <32 x float> [[TMP107]], float [[TMP52]], i32 8 +; MAX1024-NEXT: [[TMP109:%.*]] = extractelement <32 x float> [[TMP38]], i32 16 +; MAX1024-NEXT: [[TMP110:%.*]] = insertelement <32 x float> [[TMP108]], float [[TMP109]], i32 9 +; MAX1024-NEXT: [[TMP111:%.*]] = insertelement <32 x float> [[TMP110]], float [[TMP56]], i32 10 +; MAX1024-NEXT: [[TMP112:%.*]] = extractelement <32 x float> [[TMP38]], i32 20 +; MAX1024-NEXT: [[TMP113:%.*]] = insertelement <32 x float> [[TMP111]], float [[TMP112]], i32 11 +; MAX1024-NEXT: [[TMP114:%.*]] = insertelement <32 x float> [[TMP113]], float [[TMP60]], i32 12 +; MAX1024-NEXT: [[TMP115:%.*]] = extractelement <32 x float> [[TMP38]], i32 24 +; MAX1024-NEXT: [[TMP116:%.*]] = insertelement <32 x float> [[TMP114]], float [[TMP115]], i32 13 +; MAX1024-NEXT: [[TMP117:%.*]] = insertelement <32 x float> [[TMP116]], float [[TMP64]], i32 14 +; MAX1024-NEXT: [[TMP118:%.*]] = extractelement <32 x float> [[TMP38]], i32 28 +; MAX1024-NEXT: [[TMP119:%.*]] = insertelement <32 x float> [[TMP117]], float [[TMP118]], i32 15 +; MAX1024-NEXT: [[TMP120:%.*]] = insertelement <32 x float> [[TMP119]], float [[TMP68]], i32 16 +; MAX1024-NEXT: [[SHUFFLE2:%.*]] = shufflevector <32 x float> [[TMP120]], <32 x float> poison, <32 x i32> ; MAX1024-NEXT: br label [[BB2]] ; MAX1024: bb2: -; MAX1024-NEXT: [[TMP165:%.*]] = phi <32 x float> [ [[TMP38]], [[BB3]] ], [ [[TMP125]], [[BB4]] ], [ [[TMP164]], [[BB5]] ], [ [[TMP86]], [[BB1]] ] -; MAX1024-NEXT: [[TMP166:%.*]] = extractelement <32 x float> [[TMP165]], i32 30 -; MAX1024-NEXT: store float [[TMP166]], float* undef, align 4 +; MAX1024-NEXT: [[TMP121:%.*]] = phi <32 x float> [ [[TMP38]], [[BB3]] ], [ [[SHUFFLE1]], [[BB4]] ], [ [[SHUFFLE2]], [[BB5]] ], [ [[SHUFFLE3]], [[BB1]] ] +; MAX1024-NEXT: [[TMP122:%.*]] = extractelement <32 x float> [[TMP121]], i32 30 +; MAX1024-NEXT: store float [[TMP122]], float* undef, align 4 ; MAX1024-NEXT: ret void ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll --- a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll @@ -43,8 +43,8 @@ define void @test2() { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> ) -; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <4 x i32> undef, [[TMP0]] +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> poison, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <4 x i32> poison, [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP2]], i32 77) ; CHECK-NEXT: [[E:%.*]] = icmp ugt i32 [[TMP3]], 1