diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -679,6 +679,8 @@ VectorizableTree.clear(); ScalarToTreeEntry.clear(); MustGather.clear(); + GatheredLoads.clear(); + GatheredLoadsEntriesFirst = -1; ExternalUses.clear(); for (auto &Iter : BlocksSchedules) { BlockScheduling *BS = Iter.second.get(); @@ -1882,12 +1884,18 @@ /// Maps a specific scalar to its tree entry. SmallDenseMap ScalarToTreeEntry; - /// Maps a value to the proposed vectorizable size. + /// Maps a value to the proposed vectorizable size. SmallDenseMap InstrElementSize; /// A list of scalars that we found that we need to keep as scalars. ValueSet MustGather; + /// A list of loads to be gathered during the vectorization process. We can + /// try to vectorize them at the end, if profitable. + SmallVector GatheredLoads; + /// The index of the first gathered load entry in the VectorizeTree. + int GatheredLoadsEntriesFirst = -1; + /// This POD struct describes one external user in the vectorized tree. struct ExternalUser { ExternalUser(Value *S, llvm::User *U, int L) @@ -2986,6 +2994,104 @@ if (!allSameType(Roots)) return; buildTree_rec(Roots, 0, EdgeInfo()); + // Try to vectorize gathered loads. + if (!GatheredLoads.empty() && !isTreeTinyAndNotFullyVectorizable()) { + GatheredLoadsEntriesFirst = VectorizableTree.size(); + SmallDenseMap GatherPointers; + for (LoadInst *LI : GatheredLoads) + GatherPointers.try_emplace(LI, + getUnderlyingObject(LI->getPointerOperand())); + + // Sort by type, base pointers and parents. + auto &&LoadSorter = [this, &GatherPointers](LoadInst *V, LoadInst *V2) { + DomTreeNodeBase *NodeI1 = DT->getNode(V->getParent()); + DomTreeNodeBase *NodeI2 = DT->getNode(V2->getParent()); + assert(NodeI1 && "Should only process reachable instructions"); + assert(NodeI2 && "Should only process reachable instructions"); + assert((NodeI1 == NodeI2) == + (NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) && + "Different nodes should have different DFS numbers"); + return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn() || + (V->getParent() == V2->getParent() && + V->getPointerOperand()->getType()->getTypeID() < + V2->getPointerOperand()->getType()->getTypeID()) || + (V->getParent() == V2->getParent() && + V->getPointerOperand()->getType() == + V2->getPointerOperand()->getType() && + GatherPointers[V] < GatherPointers[V2]); + }; + + sort(GatheredLoads, LoadSorter); + + // Try to vectorize elements based on their types, bases and parents. + for (auto IncIt = GatheredLoads.begin(), E = GatheredLoads.end(); + IncIt != E;) { + + // Look for the next elements with the same type. + auto *SameTypeIt = IncIt; + Type *EltTy = (*IncIt)->getPointerOperand()->getType(); + Value *Ptr = GatherPointers[*IncIt]; + + SetVector Set(IncIt, SameTypeIt); + while (SameTypeIt != E && + (*SameTypeIt)->getParent() == (*IncIt)->getParent() && + (*SameTypeIt)->getPointerOperand()->getType() == EltTy && + Ptr == GatherPointers[*SameTypeIt]) { + if (!getTreeEntry(*SameTypeIt)) + Set.insert(*SameTypeIt); + ++SameTypeIt; + } + + ArrayRef Loads = Set.getArrayRef(); + int NumElts = Loads.size(); + if (NumElts >= 4 || + (NumElts == 2 && all_of(Loads, [this](LoadInst *LI) { + return LI->hasOneUse() || + (std::distance(LI->user_begin(), LI->user_end()) == + LI->getNumUses() && + all_of(LI->users(), + [this](User *U) { return getTreeEntry(U); })); + }))) { + SmallVector Pointers(NumElts); + for (int I = 0; I < NumElts; ++I) + Pointers[I] = Loads[I]->getPointerOperand(); + SmallVector SortedIndicies; + Type *ScalarTy = Loads.front()->getType(); + if (sortPtrAccesses(Pointers, ScalarTy, *DL, *SE, SortedIndicies)) { + if (SortedIndicies.empty()) { + SortedIndicies.assign(NumElts, 0); + std::iota(SortedIndicies.begin(), SortedIndicies.end(), 0); + } + Optional Diff = getPointersDiff( + ScalarTy, Pointers[SortedIndicies.front()], ScalarTy, + Pointers[SortedIndicies.back()], *DL, *SE); + int MaxLoads = std::max(getMaxVecRegSize() / DL->getTypeSizeInBits( + Loads[0]->getType()), + Roots.size()) * + (NumElts >= 4 ? 1 : 2); + if (Diff && *Diff < MaxLoads) { + SmallVector Values( + PowerOf2Ceil(*Diff + 1), UndefValue::get((*IncIt)->getType())); + // Sort loads. + Values[0] = Loads[SortedIndicies.front()]; + for (int I = 1; I < NumElts; ++I) { + Optional Diff = getPointersDiff( + ScalarTy, Pointers[SortedIndicies.front()], ScalarTy, + Pointers[SortedIndicies[I]], *DL, *SE); + Values[*Diff] = Loads[SortedIndicies[I]]; + } + LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize gathered loads (" + << NumElts << ")\n"); + + buildTree_rec(Values, 0, EdgeInfo()); + } + } + } + + // Start over at the next instruction of a different type (or the end). + IncIt = SameTypeIt; + } + } } namespace { @@ -3089,6 +3195,18 @@ Order[MaskedIndices[I]] = AvailableIndices[I]; } +/// Tries to find subvector of loads and builds new vector of only loads if can +/// be profitable. +static void +gatherPossiblyVectorizableLoads(const BoUpSLP &R, ArrayRef VL, + SmallVectorImpl &GatheredLoads) { + for (Value *V : VL) { + if (auto *LI = dyn_cast(V)) + if (!R.isDeleted(LI) && isValidElementType(LI->getType())) + GatheredLoads.push_back(LI); + } +} + void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, const EdgeInfo &UserTreeIdx) { assert((allConstant(VL) || allSameType(VL)) && "Invalid types!"); @@ -3096,6 +3214,7 @@ InstructionsState S = getSameOpcode(VL); if (Depth == RecursionMaxDepth) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); + gatherPossiblyVectorizableLoads(*this, VL, GatheredLoads); newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } @@ -3126,6 +3245,7 @@ // If all of the operands are identical or constant we have a simple solution. if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode()) { + gatherPossiblyVectorizableLoads(*this, VL, GatheredLoads); LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n"); newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; @@ -3177,7 +3297,7 @@ // we need to gather the scalars. // The reduction nodes (stored in UserIgnoreList) also should stay scalar. for (Value *V : VL) { - if (MustGather.count(V) || is_contained(UserIgnoreList, V)) { + if (is_contained(UserIgnoreList, V)) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; @@ -3214,6 +3334,7 @@ LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n"); if (NumUniqueScalarValues <= 1 || !llvm::isPowerOf2_32(NumUniqueScalarValues)) { + gatherPossiblyVectorizableLoads(*this, VL, GatheredLoads); LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; @@ -3599,13 +3720,33 @@ ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n"); TE->setOperandsInOrder(); - for (unsigned i = 0, e = 2; i < e; ++i) { + for (unsigned I = 0, E = 2; I < E; ++I) { ValueList Operands; - // Prepare the operand vector. - for (Value *V : VL) - Operands.push_back(cast(V)->getOperand(i)); + if (I >= 1) { + // Need to cast all elements to the same type before vectorization to + // avoid crash. + Type *VL0Ty = VL0->getOperand(I)->getType(); + Type *Ty = + llvm::all_of(VL, + [VL0Ty](Value *V) { return VL0Ty == V->getType(); }) + ? VL0Ty + : DL->getIndexType(cast(VL0) + ->getPointerOperandType() + ->getScalarType()); + // Prepare the operand vector. + for (Value *V : VL) { + auto *Op = cast(V)->getOperand(I); + auto *CI = cast(Op); + Operands.push_back(ConstantExpr::getIntegerCast( + CI, Ty, CI->getValue().isSignBitSet())); + } + } else { + // Prepare the operand vector. + for (Value *V : VL) + Operands.push_back(cast(V)->getOperand(I)); + } - buildTree_rec(Operands, Depth + 1, {TE, i}); + buildTree_rec(Operands, Depth + 1, {TE, I}); } return; } @@ -4934,6 +5075,20 @@ for (unsigned I = 0, E = VectorizableTree.size(); I < E; ++I) { TreeEntry &TE = *VectorizableTree[I].get(); + // Exclude cost of gather loads nodes which are not used. + if (GatheredLoadsEntriesFirst >= 0 && + I >= static_cast(GatheredLoadsEntriesFirst) && + TE.State == TreeEntry::NeedToGather) { + assert(all_of(TE.Scalars, + [this](Value *V) { + return (isa(V) && MustGather.contains(V)) || + isa(V) || + V->getType()->isPtrOrPtrVectorTy(); + }) && + "Expected loads, pointers or constants only."); + continue; + } + InstructionCost C = getEntryCost(&TE, VectorizedVals); Cost += C; LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C @@ -5163,6 +5318,12 @@ } } + if (UsedTEs.empty()) { + assert(all_of(TE->Scalars, UndefValue::classof) && + "Expected vector of undefs only."); + return None; + } + unsigned VF = 0; if (UsedTEs.size() == 1) { // Try to find the perfect match in another gather node at first. @@ -5510,66 +5671,12 @@ } } - // Check that every instruction appears once in this bundle. - SmallVector ReuseShuffleIndicies; - SmallVector UniqueValues; - if (VL.size() > 2) { - DenseMap UniquePositions; - unsigned NumValues = - std::distance(VL.begin(), find_if(reverse(VL), [](Value *V) { - return !isa(V); - }).base()); - VF = std::max(VF, PowerOf2Ceil(NumValues)); - int UniqueVals = 0; - bool HasUndefs = false; - for (Value *V : VL.drop_back(VL.size() - VF)) { - if (isa(V)) { - ReuseShuffleIndicies.emplace_back(UndefMaskElem); - HasUndefs = true; - continue; - } - if (isConstant(V)) { - ReuseShuffleIndicies.emplace_back(UniqueValues.size()); - UniqueValues.emplace_back(V); - continue; - } - auto Res = UniquePositions.try_emplace(V, UniqueValues.size()); - ReuseShuffleIndicies.emplace_back(Res.first->second); - if (Res.second) { - UniqueValues.emplace_back(V); - ++UniqueVals; - } - } - if (HasUndefs && UniqueVals == 1 && UniqueValues.size() == 1) { - // Emit pure splat vector. - // FIXME: why it is not identified as an identity. - unsigned NumUndefs = count(ReuseShuffleIndicies, UndefMaskElem); - if (NumUndefs == ReuseShuffleIndicies.size() - 1) - ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(), - UndefMaskElem); - else - ReuseShuffleIndicies.assign(VF, 0); - } else if (UniqueValues.size() >= VF - 1 || UniqueValues.size() <= 1) { - ReuseShuffleIndicies.clear(); - UniqueValues.clear(); - UniqueValues.append(VL.begin(), std::next(VL.begin(), NumValues)); - } - UniqueValues.append(VF - UniqueValues.size(), - PoisonValue::get(VL[0]->getType())); - VL = UniqueValues; - } - - ShuffleInstructionBuilder ShuffleBuilder(Builder, VF); - Value *Vec = gather(VL); - if (!ReuseShuffleIndicies.empty()) { - ShuffleBuilder.addMask(ReuseShuffleIndicies); - Vec = ShuffleBuilder.finalize(Vec); - if (auto *I = dyn_cast(Vec)) { - GatherSeq.insert(I); - CSEBlocks.insert(I->getParent()); - } - } - return Vec; + auto *I = + find_if(VectorizableTree, [VL](const std::unique_ptr &TE) { + return TE->State == TreeEntry::NeedToGather && TE->isSame(VL); + }); + assert(I != VectorizableTree.end() && "Gather node is not in the graph."); + return vectorizeTree(I->get()); } Value *BoUpSLP::vectorizeTree(TreeEntry *E) { @@ -5586,7 +5693,11 @@ VF = E->ReuseShuffleIndices.size(); ShuffleInstructionBuilder ShuffleBuilder(Builder, VF); if (E->State == TreeEntry::NeedToGather) { - setInsertPointAfterBundle(E); + // Can set insert point safely on for the initial gather node. + if (E == VectorizableTree.front().get()) + setInsertPointAfterBundle(E); + SmallVector ReuseShuffleIndicies(E->ReuseShuffleIndices.begin(), + E->ReuseShuffleIndices.end()); Value *Vec; SmallVector Mask; SmallVector Entries; @@ -5595,13 +5706,56 @@ if (Shuffle.hasValue()) { assert((Entries.size() == 1 || Entries.size() == 2) && "Expected shuffle of 1 or 2 entries."); - Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue, + if (Entries.size() == 1) + Vec = + Builder.CreateShuffleVector(Entries.front()->VectorizedValue, Mask); + else + Vec = + Builder.CreateShuffleVector(Entries.front()->VectorizedValue, Entries.back()->VectorizedValue, Mask); } else { - Vec = gather(E->Scalars); + Type *ScalarTy = E->Scalars.front()->getType(); + SmallVector VL(VF, PoisonValue::get(ScalarTy)); + copy(E->Scalars, VL.begin()); + // Check that every instruction appears once in this bundle. + SmallVector UniqueValues; + if (VL.size() > 2 && !NeedToShuffleReuses) { + DenseMap UniquePositions; + int UniqueVals = 0; + for (Value *V : VL) { + if (isa(V) && !isa(V)) { + ReuseShuffleIndicies.emplace_back(UndefMaskElem); + continue; + } + if (isConstant(V)) { + ReuseShuffleIndicies.emplace_back(UniqueValues.size()); + UniqueValues.emplace_back(V); + continue; + } + auto Res = UniquePositions.try_emplace(V, UniqueValues.size()); + ReuseShuffleIndicies.emplace_back(Res.first->second); + if (Res.second) { + UniqueValues.emplace_back(V); + ++UniqueVals; + } + } + if (UniqueVals == 1 && UniqueValues.size() == 1) { + // Emit pure splat vector. + ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(), + UndefMaskElem); + } else if (UniqueValues.size() >= VF - 1 || UniqueValues.size() <= 1) { + ReuseShuffleIndicies.clear(); + UniqueValues.swap(VL); + } + UniqueValues.append(VF - UniqueValues.size(), + PoisonValue::get(ScalarTy)); + VL.swap(UniqueValues); + } + + Vec = gather(VL); } - if (NeedToShuffleReuses) { - ShuffleBuilder.addMask(E->ReuseShuffleIndices); + if (!ReuseShuffleIndicies.empty()) { + ShuffleBuilder.addMask(ReuseShuffleIndicies); Vec = ShuffleBuilder.finalize(Vec); if (auto *I = dyn_cast(Vec)) { GatherSeq.insert(I); @@ -6168,6 +6322,14 @@ scheduleBlock(BSIter.second.get()); } + // Need to vectorize gathered loads independently since there are no direct + // users, only indirect ones, represented by gathered nodes. + for (const std::unique_ptr &TE : VectorizableTree) { + if (TE->getOpcode() == Instruction::Load && + TE->State != TreeEntry::NeedToGather && TE->UserTreeIndices.empty() && + TE.get() != VectorizableTree.front().get()) + TE->VectorizedValue = vectorizeTree(TE.get()); + } Builder.SetInsertPoint(&F->getEntryBlock().front()); auto *VectorRoot = vectorizeTree(VectorizableTree[0].get()); @@ -6398,7 +6560,8 @@ Instruction *In = &*it++; if (isDeleted(In)) continue; - if (!isa(In) && !isa(In)) + if (!isa(In) && !isa(In) && + !isa(In)) continue; // Check if we can replace this instruction with any of the diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll @@ -10,9 +10,9 @@ ; CHECK-LABEL: @f_noalias( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SCALE:%.*]] = getelementptr inbounds [[STRUCT_WEIGHT_T:%.*]], %struct.weight_t* [[W:%.*]], i64 0, i32 0 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[SCALE]], align 16 ; CHECK-NEXT: [[OFFSET:%.*]] = getelementptr inbounds [[STRUCT_WEIGHT_T]], %struct.weight_t* [[W]], i64 0, i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[OFFSET]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SCALE]] to <2 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 16 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 1 ; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i8, i8* [[DST:%.*]], i64 1 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 2 @@ -21,24 +21,18 @@ ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SRC]] to <4 x i8>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* [[TMP2]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP3]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP0]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP4]] -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP1]], i32 2 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP1]], i32 3 -; CHECK-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP9]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult <4 x i32> [[TMP14]], -; CHECK-NEXT: [[TMP16:%.*]] = icmp sgt <4 x i32> [[TMP14]], zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = sext <4 x i1> [[TMP16]] to <4 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = select <4 x i1> [[TMP15]], <4 x i32> [[TMP14]], <4 x i32> [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = trunc <4 x i32> [[TMP18]] to <4 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <4 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <4 x i32> [[TMP8]], +; CHECK-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP8]], <4 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i32> [[TMP12]] to <4 x i8> ; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 3 -; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8* [[DST]] to <4 x i8>* -; CHECK-NEXT: store <4 x i8> [[TMP19]], <4 x i8>* [[TMP20]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8* [[DST]] to <4 x i8>* +; CHECK-NEXT: store <4 x i8> [[TMP13]], <4 x i8>* [[TMP14]], align 1 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll @@ -25,14 +25,14 @@ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX43]] to <4 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> poison, <4 x double> zeroinitializer, <4 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> undef, <4 x double> zeroinitializer, <4 x double> [[TMP2]]) ; CHECK-NEXT: br label [[SW_EPILOG:%.*]] ; CHECK: sw.bb195: ; CHECK-NEXT: br label [[SW_EPILOG]] ; CHECK: do.body: ; CHECK-NEXT: unreachable ; CHECK: sw.epilog: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x double> [ poison, [[SW_BB195]] ], [ [[TMP3]], [[SW_BB]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x double> [ undef, [[SW_BB195]] ], [ [[TMP3]], [[SW_BB]] ] ; CHECK-NEXT: ret i32 undef ; CHECK: if.end.1: ; CHECK-NEXT: br label [[FOR_COND15_1:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll @@ -149,7 +149,7 @@ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_0]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_1]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> undef, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP5]], [[TMP9]] ; CHECK-NEXT: [[TMP2_11:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll @@ -149,7 +149,7 @@ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_0]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_1]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> undef, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP5]], [[TMP9]] ; CHECK-NEXT: [[TMP2_11:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll @@ -204,16 +204,10 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i8> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[X:%.*]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i8> [[TMP3]], i8 [[X]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i8> [[TMP4]], i8 [[X]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> [[TMP5]], i8 [[X]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[TMP6]], i8 [[X]], i32 4 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[X]], i32 5 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i8> [[TMP8]], i8 [[X]], i32 6 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[X]], i32 7 -; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP2]], <8 x i8> [[TMP1]], <8 x i8> [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[PTR]] to <8 x i8>* -; CHECK-NEXT: store <8 x i8> [[TMP11]], <8 x i8>* [[TMP12]], align 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP2]], <8 x i8> [[TMP1]], <8 x i8> [[SHUFFLE]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[PTR]] to <8 x i8>* +; CHECK-NEXT: store <8 x i8> [[TMP4]], <8 x i8>* [[TMP5]], align 2 ; CHECK-NEXT: ret void ; entry: @@ -280,23 +274,17 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i8> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[X:%.*]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i8> [[TMP3]], i8 [[X]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i8> [[TMP4]], i8 [[X]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> [[TMP5]], i8 [[X]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[TMP6]], i8 [[X]], i32 4 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[X]], i32 5 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i8> [[TMP8]], i8 [[X]], i32 6 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[X]], i32 7 -; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP2]], <8 x i8> [[TMP1]], <8 x i8> [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[PTR]] to <8 x i8>* -; CHECK-NEXT: store <8 x i8> [[TMP11]], <8 x i8>* [[TMP12]], align 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP2]], <8 x i8> [[TMP1]], <8 x i8> [[SHUFFLE]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[PTR]] to <8 x i8>* +; CHECK-NEXT: store <8 x i8> [[TMP4]], <8 x i8>* [[TMP5]], align 2 ; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 8 ; CHECK-NEXT: [[L_8:%.*]] = load i8, i8* [[GEP_8]], align 1 ; CHECK-NEXT: [[CMP_8:%.*]] = icmp ugt i8 [[L_8]], -1 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i8> [[TMP1]], i32 0 -; CHECK-NEXT: [[S_8:%.*]] = select i1 [[CMP_8]], i8 [[TMP13]], i8 [[X]] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i8> [[TMP11]], i32 0 -; CHECK-NEXT: store i8 [[TMP14]], i8* [[GEP_8]], align 2 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i8> [[TMP1]], i32 0 +; CHECK-NEXT: [[S_8:%.*]] = select i1 [[CMP_8]], i8 [[TMP6]], i8 [[X]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i8> [[TMP4]], i32 0 +; CHECK-NEXT: store i8 [[TMP7]], i8* [[GEP_8]], align 2 ; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 9 ; CHECK-NEXT: [[L_9:%.*]] = load i8, i8* [[GEP_9]], align 1 ; CHECK-NEXT: [[CMP_9:%.*]] = icmp ugt i8 [[L_9]], -1 @@ -444,12 +432,10 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i16> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[X:%.*]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[X]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[X]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i16> [[TMP5]], i16 [[X]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP1]], <4 x i16> [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[PTR]] to <4 x i16>* -; CHECK-NEXT: store <4 x i16> [[TMP7]], <4 x i16>* [[TMP8]], align 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP1]], <4 x i16> [[SHUFFLE]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[PTR]] to <4 x i16>* +; CHECK-NEXT: store <4 x i16> [[TMP4]], <4 x i16>* [[TMP5]], align 2 ; CHECK-NEXT: ret void ; entry: @@ -493,16 +479,10 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[X:%.*]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[X]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[X]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i16> [[TMP5]], i16 [[X]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[X]], i32 4 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i16> [[TMP7]], i16 [[X]], i32 5 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i16> [[TMP8]], i16 [[X]], i32 6 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i16> [[TMP9]], i16 [[X]], i32 7 -; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP11]], <8 x i16>* [[TMP12]], align 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[SHUFFLE]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[PTR]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP4]], <8 x i16>* [[TMP5]], align 2 ; CHECK-NEXT: ret void ; entry: @@ -594,12 +574,10 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[X]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[X]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> [[SHUFFLE]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 2 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll @@ -491,8 +491,7 @@ ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> [[TMP17]], double [[V1_LANE_2]], i32 5 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_3]], i32 6 ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_4]], i32 7 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP21:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP21:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE2]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]] ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x double> [[TMP21]], <8 x double> poison, <9 x i32> ; CHECK-NEXT: [[B_INS_71:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP22]], <9 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll @@ -18,7 +18,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ADD277]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> poison, [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> undef, [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP6]], ; CHECK-NEXT: [[ARRAYIDX372_3:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 3 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX372]] to <4 x i32>* diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll @@ -8,21 +8,19 @@ ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[OP_EXTRA1:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP6:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP3:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[DUMMY_ADD:%.*]] = add i16 0, 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[TMP0]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP6]] = extractelement <4 x i64> [[TMP5]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 -; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP7]], 32 -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> , [[TMP5]] -; CHECK-NEXT: [[TMP9:%.*]] = ashr exact <4 x i64> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP9]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i64 [[TMP10]], 0 -; CHECK-NEXT: [[OP_EXTRA1]] = add i64 [[OP_EXTRA]], [[TMP6]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i64> [[SHUFFLE]], +; CHECK-NEXT: [[TMP3]] = extractelement <4 x i64> [[TMP2]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP4]], 32 +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> , [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]]) +; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i64 [[TMP7]], 0 +; CHECK-NEXT: [[OP_EXTRA1]] = add i64 [[OP_EXTRA]], [[TMP3]] ; CHECK-NEXT: br label [[LOOP]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll @@ -546,10 +546,8 @@ ; CHECK-NEXT: [[Z:%.*]] = zext i16 [[X]] to i32 ; CHECK-NEXT: [[S:%.*]] = shl nuw nsw i32 [[Z]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[S]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[S]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[S]], i32 3 -; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast ([8 x i32]* @output to <4 x i32>*), align 16 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* bitcast ([8 x i32]* @output to <4 x i32>*), align 16 ; CHECK-NEXT: ret void ; %x = load i16, i16* %p, align 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll @@ -9,15 +9,13 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @bar() ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[N]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[N]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[N]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: ret i32 undef ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll @@ -18,20 +18,16 @@ ; CHECK-NEXT: [[V1:%.*]] = sub i64 [[A0]], 1 ; CHECK-NEXT: [[V2:%.*]] = sub i64 [[B0]], 1 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[V1]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[V1]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[V1]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[V1]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> poison, i64 [[V2]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i64> [[TMP4]], i64 [[V2]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i64> [[TMP5]], i64 [[V2]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i64> [[TMP6]], i64 [[V2]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> [[TMP3]], [[TMP7]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[V2]], i32 0 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i64> [[SHUFFLE]], [[SHUFFLE1]] ; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds i64, i64* [[S:%.*]], i64 0 ; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds i64, i64* [[S]], i64 1 ; CHECK-NEXT: [[IDXS2:%.*]] = getelementptr inbounds i64, i64* [[S]], i64 2 ; CHECK-NEXT: [[IDXS3:%.*]] = getelementptr inbounds i64, i64* [[S]], i64 3 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64* [[IDXS0]] to <4 x i64>* -; CHECK-NEXT: store <4 x i64> [[TMP8]], <4 x i64>* [[TMP9]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[IDXS0]] to <4 x i64>* +; CHECK-NEXT: store <4 x i64> [[TMP2]], <4 x i64>* [[TMP3]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -81,16 +77,14 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[D0]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[V1]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V1]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[V1]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[V1]], i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP8]], [[TMP4]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[SHUFFLE]], [[TMP4]] ; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i64 0 ; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 1 ; CHECK-NEXT: [[IDXS2:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 2 ; CHECK-NEXT: [[IDXS3:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 3 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[IDXS0]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP10]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[IDXS0]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 8 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll @@ -52,26 +52,12 @@ ; ; AVX-LABEL: @splat( ; AVX-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0 -; AVX-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[C]], i32 1 -; AVX-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[TMP2]], i8 [[C]], i32 2 -; AVX-NEXT: [[TMP4:%.*]] = insertelement <16 x i8> [[TMP3]], i8 [[C]], i32 3 -; AVX-NEXT: [[TMP5:%.*]] = insertelement <16 x i8> [[TMP4]], i8 [[C]], i32 4 -; AVX-NEXT: [[TMP6:%.*]] = insertelement <16 x i8> [[TMP5]], i8 [[C]], i32 5 -; AVX-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[TMP6]], i8 [[C]], i32 6 -; AVX-NEXT: [[TMP8:%.*]] = insertelement <16 x i8> [[TMP7]], i8 [[C]], i32 7 -; AVX-NEXT: [[TMP9:%.*]] = insertelement <16 x i8> [[TMP8]], i8 [[C]], i32 8 -; AVX-NEXT: [[TMP10:%.*]] = insertelement <16 x i8> [[TMP9]], i8 [[C]], i32 9 -; AVX-NEXT: [[TMP11:%.*]] = insertelement <16 x i8> [[TMP10]], i8 [[C]], i32 10 -; AVX-NEXT: [[TMP12:%.*]] = insertelement <16 x i8> [[TMP11]], i8 [[C]], i32 11 -; AVX-NEXT: [[TMP13:%.*]] = insertelement <16 x i8> [[TMP12]], i8 [[C]], i32 12 -; AVX-NEXT: [[TMP14:%.*]] = insertelement <16 x i8> [[TMP13]], i8 [[C]], i32 13 -; AVX-NEXT: [[TMP15:%.*]] = insertelement <16 x i8> [[TMP14]], i8 [[C]], i32 14 -; AVX-NEXT: [[TMP16:%.*]] = insertelement <16 x i8> [[TMP15]], i8 [[C]], i32 15 -; AVX-NEXT: [[TMP17:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0 -; AVX-NEXT: [[TMP18:%.*]] = insertelement <16 x i8> [[TMP17]], i8 [[B:%.*]], i32 1 -; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[TMP18]], <16 x i8> poison, <16 x i32> -; AVX-NEXT: [[TMP19:%.*]] = xor <16 x i8> [[TMP16]], [[SHUFFLE]] -; AVX-NEXT: store <16 x i8> [[TMP19]], <16 x i8>* bitcast ([32 x i8]* @cle to <16 x i8>*), align 16 +; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> zeroinitializer +; AVX-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0 +; AVX-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[TMP2]], i8 [[B:%.*]], i32 1 +; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> +; AVX-NEXT: [[TMP4:%.*]] = xor <16 x i8> [[SHUFFLE]], [[SHUFFLE1]] +; AVX-NEXT: store <16 x i8> [[TMP4]], <16 x i8>* bitcast ([32 x i8]* @cle to <16 x i8>*), align 16 ; AVX-NEXT: ret void ; %1 = xor i8 %c, %a @@ -130,19 +116,15 @@ ; ; AVX-LABEL: @same_opcode_on_one_side( ; AVX-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0 -; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[C]], i32 1 -; AVX-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[C]], i32 2 -; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[C]], i32 3 -; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0 -; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 1 -; AVX-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[A]], i32 2 -; AVX-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[A]], i32 3 -; AVX-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP4]], [[TMP8]] -; AVX-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[B:%.*]], i32 1 -; AVX-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[C]], i32 2 -; AVX-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[A]], i32 3 -; AVX-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP9]], [[TMP12]] -; AVX-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16 +; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer +; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0 +; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer +; AVX-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], [[SHUFFLE1]] +; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i32 1 +; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[C]], i32 2 +; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 3 +; AVX-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP3]], [[TMP6]] +; AVX-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16 ; AVX-NEXT: ret void ; %add1 = add i32 %c, %a diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll @@ -29,13 +29,13 @@ ; CHECK-NEXT: [[IXX22:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 ; CHECK-NEXT: [[IX2:%.*]] = fmul double [[TMP8]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP5]], <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> , double [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> , double [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[TMP13]], [[TMP14]] ; CHECK-NEXT: switch i32 undef, label [[BB1:%.*]] [ ; CHECK-NEXT: i32 0, label [[BB2:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll @@ -129,11 +129,10 @@ ; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds double, double* [[INBUF:%.*]], i64 1 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[INBUF]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll @@ -110,17 +110,9 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 undef, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]] ; CHECK: if.then38: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> , double undef, i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> undef, [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> undef, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> undef, [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> undef, [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> undef, [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> undef, [[TMP6]] ; CHECK-NEXT: [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY_5_11_53_95_137_191_197_203_239_257_263_269_275_281_287_293_383_437_443_455_461_599_601:%.*]], %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601* undef, i64 0, i32 1, i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[AGG_TMP74663_SROA_0_0_IDX]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[AGG_TMP74663_SROA_0_0_IDX]] to <2 x double>* +; CHECK-NEXT: store <2 x double> undef, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: br label [[RETURN:%.*]] ; CHECK: if.then78: ; CHECK-NEXT: br label [[RETURN]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll @@ -75,13 +75,11 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> poison, double [[CONV]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[CONV]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[CONV]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[CONV]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP6]], [[TMP2]] -; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x double> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[A]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP8]], <4 x double>* [[TMP9]], align 8 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x double> [[SHUFFLE]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[A]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[TMP6]], align 8 ; CHECK-NEXT: ret i32 undef ; entry: @@ -209,13 +207,11 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> poison, double [[CONV]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[CONV]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[CONV]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[CONV]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP6]], [[TMP2]] -; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x double> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[A]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP8]], <4 x double>* [[TMP9]], align 8 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x double> [[SHUFFLE]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[A]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[TMP6]], align 8 ; CHECK-NEXT: ret i32 undef ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll b/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll @@ -24,13 +24,11 @@ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP1]], [[TMP5]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[SHUFFLE]] ; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[B]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret i32 0 ; entry: @@ -77,15 +75,13 @@ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP1]], [[TMP5]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[SHUFFLE]] ; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[B]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 -; CHECK-NEXT: ret i32 [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: ret i32 [[TMP5]] ; entry: %0 = load i32, i32* %A, align 4 @@ -123,15 +119,13 @@ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP1]], [[TMP5]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[SHUFFLE]] ; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[B]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1 -; CHECK-NEXT: ret i32 [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1 +; CHECK-NEXT: ret i32 [[TMP5]] ; entry: %0 = load i32, i32* %A, align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll @@ -8,13 +8,11 @@ ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[LD]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[LD]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[LD]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[TMP3]], [[TMP3]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]] ; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[B]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll @@ -8,18 +8,16 @@ ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[N]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[N]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[N]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP7]], i32 0 -; CHECK-NEXT: [[EXTERNALUSE1:%.*]] = add nsw i32 [[TMP9]], [[M:%.*]] -; CHECK-NEXT: [[EXTERNALUSE2:%.*]] = mul nsw i32 [[TMP9]], [[M]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0 +; CHECK-NEXT: [[EXTERNALUSE1:%.*]] = add nsw i32 [[TMP6]], [[M:%.*]] +; CHECK-NEXT: [[EXTERNALUSE2:%.*]] = mul nsw i32 [[TMP6]], [[M]] ; CHECK-NEXT: [[ADD10:%.*]] = add nsw i32 [[EXTERNALUSE1]], [[EXTERNALUSE2]] ; CHECK-NEXT: ret i32 [[ADD10]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll @@ -82,23 +82,19 @@ ; CHECK-NEXT: ret float [[ADD]] ; ; THRESH1-LABEL: @f_used_twice_in_tree( -; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 -; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 -; THRESH1-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 -; THRESH1-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[X]], [[TMP3]] -; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 -; THRESH1-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 -; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]] +; THRESH1-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> +; THRESH1-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X]], [[TMP1]] +; THRESH1-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; THRESH1-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] ; THRESH1-NEXT: ret float [[ADD]] ; ; THRESH2-LABEL: @f_used_twice_in_tree( -; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 -; THRESH2-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 -; THRESH2-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 -; THRESH2-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[X]], [[TMP3]] -; THRESH2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 -; THRESH2-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 -; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]] +; THRESH2-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> +; THRESH2-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X]], [[TMP1]] +; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] ; THRESH2-NEXT: ret float [[ADD]] ; %x0 = extractelement <2 x float> %x, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -1253,40 +1253,32 @@ ; CHECK-LABEL: @wobble( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[ARG:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[ARG]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[ARG]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ARG]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[BAR]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[BAR]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[BAR]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[TMP3]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP11]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]] -; CHECK-NEXT: [[OP_EXTRA1:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]] -; CHECK-NEXT: ret i32 [[OP_EXTRA1]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP6]], [[ARG]] +; CHECK-NEXT: [[OP_EXTRA2:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP3]] +; CHECK-NEXT: ret i32 [[OP_EXTRA2]] ; ; THRESHOLD-LABEL: @wobble( ; THRESHOLD-NEXT: bb: ; THRESHOLD-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[ARG:%.*]], i32 0 -; THRESHOLD-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[ARG]], i32 1 -; THRESHOLD-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[ARG]], i32 2 -; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ARG]], i32 3 -; THRESHOLD-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0 -; THRESHOLD-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[BAR]], i32 1 -; THRESHOLD-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[BAR]], i32 2 -; THRESHOLD-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[BAR]], i32 3 -; THRESHOLD-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[TMP3]], [[TMP7]] -; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 -; THRESHOLD-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer -; THRESHOLD-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> -; THRESHOLD-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP11]]) -; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]] -; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]] -; THRESHOLD-NEXT: ret i32 [[OP_EXTRA1]] +; THRESHOLD-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer +; THRESHOLD-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0 +; THRESHOLD-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer +; THRESHOLD-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[SHUFFLE]], [[SHUFFLE1]] +; THRESHOLD-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 +; THRESHOLD-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer +; THRESHOLD-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32> +; THRESHOLD-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) +; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP6]], [[ARG]] +; THRESHOLD-NEXT: [[OP_EXTRA2:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP3]] +; THRESHOLD-NEXT: ret i32 [[OP_EXTRA2]] ; bb: %x1 = xor i32 %arg, %bar diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -1334,8 +1334,8 @@ define void @PR49730() { ; CHECK-LABEL: @PR49730( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> poison, <4 x i32> ) -; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> poison, [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> undef, [[TMP1]] ; CHECK-NEXT: [[T12:%.*]] = sub nsw i32 undef, undef ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[T12]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll @@ -25,21 +25,13 @@ ; SSE-LABEL: @bar( ; SSE-NEXT: entry: ; SSE-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[W:%.*]], i32 0 -; SSE-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[W]], i32 1 -; SSE-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[W]], i32 2 -; SSE-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[W]], i32 3 -; SSE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[W]], i32 0 -; SSE-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[W]], i32 1 -; SSE-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[W]], i32 2 -; SSE-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[W]], i32 3 -; SSE-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[W]], i32 0 -; SSE-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[W]], i32 1 -; SSE-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[W]], i32 2 -; SSE-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[W]], i32 3 -; SSE-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> poison, i32 [[W]], i32 0 -; SSE-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[W]], i32 1 -; SSE-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[W]], i32 2 -; SSE-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[W]], i32 3 +; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer +; SSE-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[W]], i32 0 +; SSE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer +; SSE-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[W]], i32 0 +; SSE-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer +; SSE-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[W]], i32 0 +; SSE-NEXT: [[SHUFFLE3:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer ; SSE-NEXT: br label [[FOR_BODY:%.*]] ; SSE: for.body: ; SSE-NEXT: [[I_0356:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] @@ -59,25 +51,25 @@ ; SSE-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 2 ; SSE-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 2 ; SSE-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 3 -; SSE-NEXT: [[TMP16:%.*]] = bitcast i8* [[C_ADDR_0352]] to <4 x i8>* -; SSE-NEXT: [[TMP17:%.*]] = load <4 x i8>, <4 x i8>* [[TMP16]], align 1 +; SSE-NEXT: [[TMP4:%.*]] = bitcast i8* [[C_ADDR_0352]] to <4 x i8>* +; SSE-NEXT: [[TMP5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP4]], align 1 ; SSE-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 3 -; SSE-NEXT: [[TMP18:%.*]] = bitcast i8* [[D_ADDR_0353]] to <4 x i8>* -; SSE-NEXT: [[TMP19:%.*]] = load <4 x i8>, <4 x i8>* [[TMP18]], align 1 +; SSE-NEXT: [[TMP6:%.*]] = bitcast i8* [[D_ADDR_0353]] to <4 x i8>* +; SSE-NEXT: [[TMP7:%.*]] = load <4 x i8>, <4 x i8>* [[TMP6]], align 1 ; SSE-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 3 -; SSE-NEXT: [[TMP20:%.*]] = bitcast i8* [[A_ADDR_0355]] to <4 x i8>* -; SSE-NEXT: [[TMP21:%.*]] = load <4 x i8>, <4 x i8>* [[TMP20]], align 1 +; SSE-NEXT: [[TMP8:%.*]] = bitcast i8* [[A_ADDR_0355]] to <4 x i8>* +; SSE-NEXT: [[TMP9:%.*]] = load <4 x i8>, <4 x i8>* [[TMP8]], align 1 ; SSE-NEXT: [[ARRAYIDX40:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 3 -; SSE-NEXT: [[TMP22:%.*]] = bitcast i8* [[B_ADDR_0351]] to <4 x i8>* -; SSE-NEXT: [[TMP23:%.*]] = load <4 x i8>, <4 x i8>* [[TMP22]], align 1 -; SSE-NEXT: [[TMP24:%.*]] = icmp ult <4 x i8> [[TMP17]], [[TMP19]] -; SSE-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP24]], <4 x i8> [[TMP23]], <4 x i8> [[TMP21]] -; SSE-NEXT: [[TMP26:%.*]] = zext <4 x i8> [[TMP25]] to <4 x i32> -; SSE-NEXT: [[TMP27:%.*]] = mul <4 x i32> [[TMP26]], [[TMP3]] -; SSE-NEXT: [[TMP28:%.*]] = trunc <4 x i32> [[TMP27]] to <4 x i8> +; SSE-NEXT: [[TMP10:%.*]] = bitcast i8* [[B_ADDR_0351]] to <4 x i8>* +; SSE-NEXT: [[TMP11:%.*]] = load <4 x i8>, <4 x i8>* [[TMP10]], align 1 +; SSE-NEXT: [[TMP12:%.*]] = icmp ult <4 x i8> [[TMP5]], [[TMP7]] +; SSE-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i8> [[TMP11]], <4 x i8> [[TMP9]] +; SSE-NEXT: [[TMP14:%.*]] = zext <4 x i8> [[TMP13]] to <4 x i32> +; SSE-NEXT: [[TMP15:%.*]] = mul <4 x i32> [[TMP14]], [[SHUFFLE]] +; SSE-NEXT: [[TMP16:%.*]] = trunc <4 x i32> [[TMP15]] to <4 x i8> ; SSE-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 3 -; SSE-NEXT: [[TMP29:%.*]] = bitcast i8* [[E_ADDR_0354]] to <4 x i8>* -; SSE-NEXT: store <4 x i8> [[TMP28]], <4 x i8>* [[TMP29]], align 1 +; SSE-NEXT: [[TMP17:%.*]] = bitcast i8* [[E_ADDR_0354]] to <4 x i8>* +; SSE-NEXT: store <4 x i8> [[TMP16]], <4 x i8>* [[TMP17]], align 1 ; SSE-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 4 ; SSE-NEXT: [[ARRAYIDX47:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 4 ; SSE-NEXT: [[ARRAYIDX49:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 4 @@ -94,25 +86,25 @@ ; SSE-NEXT: [[ARRAYIDX76:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 6 ; SSE-NEXT: [[ARRAYIDX80:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 6 ; SSE-NEXT: [[ARRAYIDX81:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 7 -; SSE-NEXT: [[TMP30:%.*]] = bitcast i8* [[ARRAYIDX45]] to <4 x i8>* -; SSE-NEXT: [[TMP31:%.*]] = load <4 x i8>, <4 x i8>* [[TMP30]], align 1 +; SSE-NEXT: [[TMP18:%.*]] = bitcast i8* [[ARRAYIDX45]] to <4 x i8>* +; SSE-NEXT: [[TMP19:%.*]] = load <4 x i8>, <4 x i8>* [[TMP18]], align 1 ; SSE-NEXT: [[ARRAYIDX83:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 7 -; SSE-NEXT: [[TMP32:%.*]] = bitcast i8* [[ARRAYIDX47]] to <4 x i8>* -; SSE-NEXT: [[TMP33:%.*]] = load <4 x i8>, <4 x i8>* [[TMP32]], align 1 +; SSE-NEXT: [[TMP20:%.*]] = bitcast i8* [[ARRAYIDX47]] to <4 x i8>* +; SSE-NEXT: [[TMP21:%.*]] = load <4 x i8>, <4 x i8>* [[TMP20]], align 1 ; SSE-NEXT: [[ARRAYIDX85:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 7 -; SSE-NEXT: [[TMP34:%.*]] = bitcast i8* [[ARRAYIDX49]] to <4 x i8>* -; SSE-NEXT: [[TMP35:%.*]] = load <4 x i8>, <4 x i8>* [[TMP34]], align 1 +; SSE-NEXT: [[TMP22:%.*]] = bitcast i8* [[ARRAYIDX49]] to <4 x i8>* +; SSE-NEXT: [[TMP23:%.*]] = load <4 x i8>, <4 x i8>* [[TMP22]], align 1 ; SSE-NEXT: [[ARRAYIDX88:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 7 -; SSE-NEXT: [[TMP36:%.*]] = bitcast i8* [[ARRAYIDX52]] to <4 x i8>* -; SSE-NEXT: [[TMP37:%.*]] = load <4 x i8>, <4 x i8>* [[TMP36]], align 1 -; SSE-NEXT: [[TMP38:%.*]] = icmp ult <4 x i8> [[TMP31]], [[TMP33]] -; SSE-NEXT: [[TMP39:%.*]] = select <4 x i1> [[TMP38]], <4 x i8> [[TMP37]], <4 x i8> [[TMP35]] -; SSE-NEXT: [[TMP40:%.*]] = zext <4 x i8> [[TMP39]] to <4 x i32> -; SSE-NEXT: [[TMP41:%.*]] = mul <4 x i32> [[TMP40]], [[TMP7]] -; SSE-NEXT: [[TMP42:%.*]] = trunc <4 x i32> [[TMP41]] to <4 x i8> +; SSE-NEXT: [[TMP24:%.*]] = bitcast i8* [[ARRAYIDX52]] to <4 x i8>* +; SSE-NEXT: [[TMP25:%.*]] = load <4 x i8>, <4 x i8>* [[TMP24]], align 1 +; SSE-NEXT: [[TMP26:%.*]] = icmp ult <4 x i8> [[TMP19]], [[TMP21]] +; SSE-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP26]], <4 x i8> [[TMP25]], <4 x i8> [[TMP23]] +; SSE-NEXT: [[TMP28:%.*]] = zext <4 x i8> [[TMP27]] to <4 x i32> +; SSE-NEXT: [[TMP29:%.*]] = mul <4 x i32> [[TMP28]], [[SHUFFLE1]] +; SSE-NEXT: [[TMP30:%.*]] = trunc <4 x i32> [[TMP29]] to <4 x i8> ; SSE-NEXT: [[ARRAYIDX92:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 7 -; SSE-NEXT: [[TMP43:%.*]] = bitcast i8* [[ARRAYIDX56]] to <4 x i8>* -; SSE-NEXT: store <4 x i8> [[TMP42]], <4 x i8>* [[TMP43]], align 1 +; SSE-NEXT: [[TMP31:%.*]] = bitcast i8* [[ARRAYIDX56]] to <4 x i8>* +; SSE-NEXT: store <4 x i8> [[TMP30]], <4 x i8>* [[TMP31]], align 1 ; SSE-NEXT: [[ARRAYIDX93:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 8 ; SSE-NEXT: [[ARRAYIDX95:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 8 ; SSE-NEXT: [[ARRAYIDX97:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 8 @@ -129,25 +121,25 @@ ; SSE-NEXT: [[ARRAYIDX124:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 10 ; SSE-NEXT: [[ARRAYIDX128:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 10 ; SSE-NEXT: [[ARRAYIDX129:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 11 -; SSE-NEXT: [[TMP44:%.*]] = bitcast i8* [[ARRAYIDX93]] to <4 x i8>* -; SSE-NEXT: [[TMP45:%.*]] = load <4 x i8>, <4 x i8>* [[TMP44]], align 1 +; SSE-NEXT: [[TMP32:%.*]] = bitcast i8* [[ARRAYIDX93]] to <4 x i8>* +; SSE-NEXT: [[TMP33:%.*]] = load <4 x i8>, <4 x i8>* [[TMP32]], align 1 ; SSE-NEXT: [[ARRAYIDX131:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 11 -; SSE-NEXT: [[TMP46:%.*]] = bitcast i8* [[ARRAYIDX95]] to <4 x i8>* -; SSE-NEXT: [[TMP47:%.*]] = load <4 x i8>, <4 x i8>* [[TMP46]], align 1 +; SSE-NEXT: [[TMP34:%.*]] = bitcast i8* [[ARRAYIDX95]] to <4 x i8>* +; SSE-NEXT: [[TMP35:%.*]] = load <4 x i8>, <4 x i8>* [[TMP34]], align 1 ; SSE-NEXT: [[ARRAYIDX133:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 11 -; SSE-NEXT: [[TMP48:%.*]] = bitcast i8* [[ARRAYIDX97]] to <4 x i8>* -; SSE-NEXT: [[TMP49:%.*]] = load <4 x i8>, <4 x i8>* [[TMP48]], align 1 +; SSE-NEXT: [[TMP36:%.*]] = bitcast i8* [[ARRAYIDX97]] to <4 x i8>* +; SSE-NEXT: [[TMP37:%.*]] = load <4 x i8>, <4 x i8>* [[TMP36]], align 1 ; SSE-NEXT: [[ARRAYIDX136:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 11 -; SSE-NEXT: [[TMP50:%.*]] = bitcast i8* [[ARRAYIDX100]] to <4 x i8>* -; SSE-NEXT: [[TMP51:%.*]] = load <4 x i8>, <4 x i8>* [[TMP50]], align 1 -; SSE-NEXT: [[TMP52:%.*]] = icmp ult <4 x i8> [[TMP45]], [[TMP47]] -; SSE-NEXT: [[TMP53:%.*]] = select <4 x i1> [[TMP52]], <4 x i8> [[TMP51]], <4 x i8> [[TMP49]] -; SSE-NEXT: [[TMP54:%.*]] = zext <4 x i8> [[TMP53]] to <4 x i32> -; SSE-NEXT: [[TMP55:%.*]] = mul <4 x i32> [[TMP54]], [[TMP11]] -; SSE-NEXT: [[TMP56:%.*]] = trunc <4 x i32> [[TMP55]] to <4 x i8> +; SSE-NEXT: [[TMP38:%.*]] = bitcast i8* [[ARRAYIDX100]] to <4 x i8>* +; SSE-NEXT: [[TMP39:%.*]] = load <4 x i8>, <4 x i8>* [[TMP38]], align 1 +; SSE-NEXT: [[TMP40:%.*]] = icmp ult <4 x i8> [[TMP33]], [[TMP35]] +; SSE-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP40]], <4 x i8> [[TMP39]], <4 x i8> [[TMP37]] +; SSE-NEXT: [[TMP42:%.*]] = zext <4 x i8> [[TMP41]] to <4 x i32> +; SSE-NEXT: [[TMP43:%.*]] = mul <4 x i32> [[TMP42]], [[SHUFFLE2]] +; SSE-NEXT: [[TMP44:%.*]] = trunc <4 x i32> [[TMP43]] to <4 x i8> ; SSE-NEXT: [[ARRAYIDX140:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 11 -; SSE-NEXT: [[TMP57:%.*]] = bitcast i8* [[ARRAYIDX104]] to <4 x i8>* -; SSE-NEXT: store <4 x i8> [[TMP56]], <4 x i8>* [[TMP57]], align 1 +; SSE-NEXT: [[TMP45:%.*]] = bitcast i8* [[ARRAYIDX104]] to <4 x i8>* +; SSE-NEXT: store <4 x i8> [[TMP44]], <4 x i8>* [[TMP45]], align 1 ; SSE-NEXT: [[ARRAYIDX141:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 12 ; SSE-NEXT: [[ARRAYIDX143:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 12 ; SSE-NEXT: [[ARRAYIDX145:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 12 @@ -164,25 +156,25 @@ ; SSE-NEXT: [[ARRAYIDX172:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 14 ; SSE-NEXT: [[ARRAYIDX176:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 14 ; SSE-NEXT: [[ARRAYIDX177:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 15 -; SSE-NEXT: [[TMP58:%.*]] = bitcast i8* [[ARRAYIDX141]] to <4 x i8>* -; SSE-NEXT: [[TMP59:%.*]] = load <4 x i8>, <4 x i8>* [[TMP58]], align 1 +; SSE-NEXT: [[TMP46:%.*]] = bitcast i8* [[ARRAYIDX141]] to <4 x i8>* +; SSE-NEXT: [[TMP47:%.*]] = load <4 x i8>, <4 x i8>* [[TMP46]], align 1 ; SSE-NEXT: [[ARRAYIDX179:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 15 -; SSE-NEXT: [[TMP60:%.*]] = bitcast i8* [[ARRAYIDX143]] to <4 x i8>* -; SSE-NEXT: [[TMP61:%.*]] = load <4 x i8>, <4 x i8>* [[TMP60]], align 1 +; SSE-NEXT: [[TMP48:%.*]] = bitcast i8* [[ARRAYIDX143]] to <4 x i8>* +; SSE-NEXT: [[TMP49:%.*]] = load <4 x i8>, <4 x i8>* [[TMP48]], align 1 ; SSE-NEXT: [[ARRAYIDX181:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 15 -; SSE-NEXT: [[TMP62:%.*]] = bitcast i8* [[ARRAYIDX145]] to <4 x i8>* -; SSE-NEXT: [[TMP63:%.*]] = load <4 x i8>, <4 x i8>* [[TMP62]], align 1 +; SSE-NEXT: [[TMP50:%.*]] = bitcast i8* [[ARRAYIDX145]] to <4 x i8>* +; SSE-NEXT: [[TMP51:%.*]] = load <4 x i8>, <4 x i8>* [[TMP50]], align 1 ; SSE-NEXT: [[ARRAYIDX184:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 15 -; SSE-NEXT: [[TMP64:%.*]] = bitcast i8* [[ARRAYIDX148]] to <4 x i8>* -; SSE-NEXT: [[TMP65:%.*]] = load <4 x i8>, <4 x i8>* [[TMP64]], align 1 -; SSE-NEXT: [[TMP66:%.*]] = icmp ult <4 x i8> [[TMP59]], [[TMP61]] -; SSE-NEXT: [[TMP67:%.*]] = select <4 x i1> [[TMP66]], <4 x i8> [[TMP65]], <4 x i8> [[TMP63]] -; SSE-NEXT: [[TMP68:%.*]] = zext <4 x i8> [[TMP67]] to <4 x i32> -; SSE-NEXT: [[TMP69:%.*]] = mul <4 x i32> [[TMP68]], [[TMP15]] -; SSE-NEXT: [[TMP70:%.*]] = trunc <4 x i32> [[TMP69]] to <4 x i8> +; SSE-NEXT: [[TMP52:%.*]] = bitcast i8* [[ARRAYIDX148]] to <4 x i8>* +; SSE-NEXT: [[TMP53:%.*]] = load <4 x i8>, <4 x i8>* [[TMP52]], align 1 +; SSE-NEXT: [[TMP54:%.*]] = icmp ult <4 x i8> [[TMP47]], [[TMP49]] +; SSE-NEXT: [[TMP55:%.*]] = select <4 x i1> [[TMP54]], <4 x i8> [[TMP53]], <4 x i8> [[TMP51]] +; SSE-NEXT: [[TMP56:%.*]] = zext <4 x i8> [[TMP55]] to <4 x i32> +; SSE-NEXT: [[TMP57:%.*]] = mul <4 x i32> [[TMP56]], [[SHUFFLE3]] +; SSE-NEXT: [[TMP58:%.*]] = trunc <4 x i32> [[TMP57]] to <4 x i8> ; SSE-NEXT: [[ARRAYIDX188:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 15 -; SSE-NEXT: [[TMP71:%.*]] = bitcast i8* [[ARRAYIDX152]] to <4 x i8>* -; SSE-NEXT: store <4 x i8> [[TMP70]], <4 x i8>* [[TMP71]], align 1 +; SSE-NEXT: [[TMP59:%.*]] = bitcast i8* [[ARRAYIDX152]] to <4 x i8>* +; SSE-NEXT: store <4 x i8> [[TMP58]], <4 x i8>* [[TMP59]], align 1 ; SSE-NEXT: [[INC]] = add nuw nsw i32 [[I_0356]], 1 ; SSE-NEXT: [[ADD_PTR]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 16 ; SSE-NEXT: [[ADD_PTR189]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 16 @@ -197,21 +189,7 @@ ; AVX512-LABEL: @bar( ; AVX512-NEXT: entry: ; AVX512-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[W:%.*]], i32 0 -; AVX512-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[W]], i32 1 -; AVX512-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[W]], i32 2 -; AVX512-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[W]], i32 3 -; AVX512-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[W]], i32 4 -; AVX512-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 [[W]], i32 5 -; AVX512-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[W]], i32 6 -; AVX512-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[W]], i32 7 -; AVX512-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[W]], i32 8 -; AVX512-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[W]], i32 9 -; AVX512-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[W]], i32 10 -; AVX512-NEXT: [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[W]], i32 11 -; AVX512-NEXT: [[TMP12:%.*]] = insertelement <16 x i32> [[TMP11]], i32 [[W]], i32 12 -; AVX512-NEXT: [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 [[W]], i32 13 -; AVX512-NEXT: [[TMP14:%.*]] = insertelement <16 x i32> [[TMP13]], i32 [[W]], i32 14 -; AVX512-NEXT: [[TMP15:%.*]] = insertelement <16 x i32> [[TMP14]], i32 [[W]], i32 15 +; AVX512-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <16 x i32> zeroinitializer ; AVX512-NEXT: br label [[FOR_BODY:%.*]] ; AVX512: for.body: ; AVX512-NEXT: [[I_0356:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] @@ -291,25 +269,25 @@ ; AVX512-NEXT: [[ARRAYIDX172:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 14 ; AVX512-NEXT: [[ARRAYIDX176:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 14 ; AVX512-NEXT: [[ARRAYIDX177:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 15 -; AVX512-NEXT: [[TMP16:%.*]] = bitcast i8* [[C_ADDR_0352]] to <16 x i8>* -; AVX512-NEXT: [[TMP17:%.*]] = load <16 x i8>, <16 x i8>* [[TMP16]], align 1 +; AVX512-NEXT: [[TMP1:%.*]] = bitcast i8* [[C_ADDR_0352]] to <16 x i8>* +; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 ; AVX512-NEXT: [[ARRAYIDX179:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 15 -; AVX512-NEXT: [[TMP18:%.*]] = bitcast i8* [[D_ADDR_0353]] to <16 x i8>* -; AVX512-NEXT: [[TMP19:%.*]] = load <16 x i8>, <16 x i8>* [[TMP18]], align 1 +; AVX512-NEXT: [[TMP3:%.*]] = bitcast i8* [[D_ADDR_0353]] to <16 x i8>* +; AVX512-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[TMP3]], align 1 ; AVX512-NEXT: [[ARRAYIDX181:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 15 -; AVX512-NEXT: [[TMP20:%.*]] = bitcast i8* [[A_ADDR_0355]] to <16 x i8>* -; AVX512-NEXT: [[TMP21:%.*]] = load <16 x i8>, <16 x i8>* [[TMP20]], align 1 +; AVX512-NEXT: [[TMP5:%.*]] = bitcast i8* [[A_ADDR_0355]] to <16 x i8>* +; AVX512-NEXT: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[TMP5]], align 1 ; AVX512-NEXT: [[ARRAYIDX184:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 15 -; AVX512-NEXT: [[TMP22:%.*]] = bitcast i8* [[B_ADDR_0351]] to <16 x i8>* -; AVX512-NEXT: [[TMP23:%.*]] = load <16 x i8>, <16 x i8>* [[TMP22]], align 1 -; AVX512-NEXT: [[TMP24:%.*]] = icmp ult <16 x i8> [[TMP17]], [[TMP19]] -; AVX512-NEXT: [[TMP25:%.*]] = select <16 x i1> [[TMP24]], <16 x i8> [[TMP23]], <16 x i8> [[TMP21]] -; AVX512-NEXT: [[TMP26:%.*]] = zext <16 x i8> [[TMP25]] to <16 x i32> -; AVX512-NEXT: [[TMP27:%.*]] = mul <16 x i32> [[TMP26]], [[TMP15]] -; AVX512-NEXT: [[TMP28:%.*]] = trunc <16 x i32> [[TMP27]] to <16 x i8> +; AVX512-NEXT: [[TMP7:%.*]] = bitcast i8* [[B_ADDR_0351]] to <16 x i8>* +; AVX512-NEXT: [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* [[TMP7]], align 1 +; AVX512-NEXT: [[TMP9:%.*]] = icmp ult <16 x i8> [[TMP2]], [[TMP4]] +; AVX512-NEXT: [[TMP10:%.*]] = select <16 x i1> [[TMP9]], <16 x i8> [[TMP8]], <16 x i8> [[TMP6]] +; AVX512-NEXT: [[TMP11:%.*]] = zext <16 x i8> [[TMP10]] to <16 x i32> +; AVX512-NEXT: [[TMP12:%.*]] = mul <16 x i32> [[TMP11]], [[SHUFFLE]] +; AVX512-NEXT: [[TMP13:%.*]] = trunc <16 x i32> [[TMP12]] to <16 x i8> ; AVX512-NEXT: [[ARRAYIDX188:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 15 -; AVX512-NEXT: [[TMP29:%.*]] = bitcast i8* [[E_ADDR_0354]] to <16 x i8>* -; AVX512-NEXT: store <16 x i8> [[TMP28]], <16 x i8>* [[TMP29]], align 1 +; AVX512-NEXT: [[TMP14:%.*]] = bitcast i8* [[E_ADDR_0354]] to <16 x i8>* +; AVX512-NEXT: store <16 x i8> [[TMP13]], <16 x i8>* [[TMP14]], align 1 ; AVX512-NEXT: [[INC]] = add nuw nsw i32 [[I_0356]], 1 ; AVX512-NEXT: [[ADD_PTR]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 16 ; AVX512-NEXT: [[ADD_PTR189]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 16 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll @@ -52,7 +52,7 @@ ; CHECK-NEXT: ret void ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ poison, [[ENTRY]] ], [ [[TMP26]], [[FOR_INC]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ undef, [[ENTRY]] ], [ [[TMP26]], [[FOR_INC]] ] ; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll @@ -69,22 +69,15 @@ ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP7]], i32 2 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP9]], i32 3 -; CHECK-NEXT: [[TMP11:%.*]] = mul <4 x i32> [[TMP2]], [[TMP10]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0 ; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1 ; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2 ; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP12]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: ret i32 undef ; %in.addr = getelementptr inbounds i32, i32* %in, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll @@ -452,17 +452,15 @@ ; CHECK-LABEL: @ChecksExtractScores( ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1 -; CHECK-NEXT: [[LOADA0:%.*]] = load double, double* [[IDX0]], align 4 -; CHECK-NEXT: [[LOADA1:%.*]] = load double, double* [[IDX1]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX0]] to <2 x double>* +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4 ; CHECK-NEXT: [[LOADVEC:%.*]] = load <2 x double>, <2 x double>* [[VECPTR1:%.*]], align 4 ; CHECK-NEXT: [[LOADVEC2:%.*]] = load <2 x double>, <2 x double>* [[VECPTR2:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[LOADA0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[LOADA0]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[LOADVEC]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[LOADA1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[LOADVEC]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[LOADVEC2]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP3]], [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP4]], [[TMP6]] ; CHECK-NEXT: [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0 ; CHECK-NEXT: [[SIDX1:%.*]] = getelementptr inbounds double, double* [[STOREARRAY]], i64 1 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[SIDX0]] to <2 x double>* @@ -602,19 +600,16 @@ ; CHECK-NEXT: [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[EXTRB0]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[EXTRA1]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP4]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[TMP10]], double [[EXTRB1]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[TMP11]], [[TMP2]] -; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], [[TMP9]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[EXTRB1]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP8]], [[TMP2]] +; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], [[TMP6]] ; CHECK-NEXT: [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0 ; CHECK-NEXT: [[SIDX1:%.*]] = getelementptr inbounds double, double* [[STOREARRAY]], i64 1 -; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[SIDX0]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP13]], <2 x double>* [[TMP14]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[SIDX0]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8 ; CHECK-NEXT: ret void ; %idx0 = getelementptr inbounds double, double* %array, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll b/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll @@ -29,19 +29,13 @@ ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>* ; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, <8 x i32>* [[TMP7]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> poison, i32 [[N]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[N]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[N]], i32 2 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[N]], i32 3 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x i32> [[TMP12]], i32 [[N]], i32 4 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x i32> [[TMP13]], i32 [[N]], i32 5 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i32> [[TMP14]], i32 [[N]], i32 6 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x i32> [[TMP15]], i32 [[N]], i32 7 -; CHECK-NEXT: [[TMP17:%.*]] = add nsw <8 x i32> [[TMP8]], [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>* -; CHECK-NEXT: store <8 x i32> [[TMP17]], <8 x i32>* [[TMP18]], align 4 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <8 x i32> [[TMP8]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>* +; CHECK-NEXT: store <8 x i32> [[TMP10]], <8 x i32>* [[TMP11]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 8 -; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP19]], [[N]] +; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP12]], [[N]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 undef diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll @@ -17,23 +17,17 @@ ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[SUB86_1]], i32 4 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 [[ADD78_2]], i32 5 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> poison, i32 [[SUB86_1]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD78_1]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[ADD94_1]], i32 2 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB102_1]], i32 3 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[ADD78_2]], i32 4 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[SUB102_3]], i32 5 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP13:%.*]] = sub nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i32> [[TMP12]], <16 x i32> [[TMP13]], <16 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = lshr <16 x i32> [[TMP14]], -; CHECK-NEXT: [[TMP16:%.*]] = and <16 x i32> [[TMP15]], -; CHECK-NEXT: [[TMP17:%.*]] = mul nuw <16 x i32> [[TMP16]], -; CHECK-NEXT: [[TMP18:%.*]] = add <16 x i32> [[TMP17]], [[TMP14]] -; CHECK-NEXT: [[TMP19:%.*]] = xor <16 x i32> [[TMP18]], [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP19]]) -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP20]], 16 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> [[SHUFFLE]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <16 x i32> [[SHUFFLE]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = sub nsw <16 x i32> [[SHUFFLE]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i32> [[TMP7]], <16 x i32> [[TMP8]], <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = lshr <16 x i32> [[TMP9]], +; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i32> [[TMP10]], +; CHECK-NEXT: [[TMP12:%.*]] = mul nuw <16 x i32> [[TMP11]], +; CHECK-NEXT: [[TMP13:%.*]] = add <16 x i32> [[TMP12]], [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP13]], [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP14]]) +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP15]], 16 ; CHECK-NEXT: [[ADD119:%.*]] = add nuw nsw i32 undef, [[SHR]] ; CHECK-NEXT: [[SHR120:%.*]] = lshr i32 [[ADD119]], 1 ; CHECK-NEXT: ret i32 [[SHR120]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll @@ -16,20 +16,18 @@ ; CHECK-LABEL: @foo( ; CHECK-NEXT: [[TMP1:%.*]] = mul nsw i32 [[N:%.*]], 5 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[TMP1]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP1]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* -; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[A]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP1]], 11 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 -; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = add nsw i32 [[TMP11]], [[TMP13]] -; CHECK-NEXT: store i32 [[TMP14]], i32* [[TMP12]], align 4 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* +; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[A]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add nsw i32 [[TMP1]], 11 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP8]], [[TMP10]] +; CHECK-NEXT: store i32 [[TMP11]], i32* [[TMP9]], align 4 ; CHECK-NEXT: ret i32 undef ; %1 = mul nsw i32 %n, 5 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll @@ -142,16 +142,15 @@ ; CHECK-NEXT: br label [[LP:%.*]] ; CHECK: lp: ; CHECK-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[FROM_1:%.*]] = getelementptr double, double* [[FROM:%.*]], i32 1 -; CHECK-NEXT: [[V0_1:%.*]] = load double, double* [[FROM]], align 4 -; CHECK-NEXT: [[V0_2:%.*]] = load double, double* [[FROM_1]], align 4 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V0_2]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V0_1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>* +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[P]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 4 ; CHECK-NEXT: br i1 undef, label [[LP]], label [[EXT:%.*]] ; CHECK: ext: ; CHECK-NEXT: ret void @@ -183,11 +182,12 @@ ; CHECK-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[P]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4 +; CHECK-NEXT: store <2 x double> [[SHUFFLE]], <2 x double>* [[TMP5]], align 4 ; CHECK-NEXT: br i1 undef, label [[LP]], label [[EXT:%.*]] ; CHECK: ext: ; CHECK-NEXT: ret void @@ -218,16 +218,15 @@ ; CHECK-NEXT: br label [[LP:%.*]] ; CHECK: lp: ; CHECK-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[FROM_1:%.*]] = getelementptr double, double* [[FROM:%.*]], i32 1 -; CHECK-NEXT: [[V0_1:%.*]] = load double, double* [[FROM]], align 4 -; CHECK-NEXT: [[V0_2:%.*]] = load double, double* [[FROM_1]], align 4 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V0_1]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V0_2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[P]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>* +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[P]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 4 ; CHECK-NEXT: br i1 undef, label [[LP]], label [[EXT:%.*]] ; CHECK: ext: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll @@ -17,23 +17,24 @@ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[SUB14]], i32 1 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP0]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP3]], poison -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP3]], <4 x i32> poison -; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i32> [[TMP5]] to <4 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 -; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP7]], i32 2 -; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP15]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[SHUFFLE]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], undef +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP4]], <4 x i32> undef +; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i32> [[TMP6]] to <4 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[TMP7]] to <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 +; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP8]], i32 2 +; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 +; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP16]] ; CHECK-NEXT: unreachable ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll @@ -244,27 +244,20 @@ ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ , [[ENTRY]] ], [ [[TMP9:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP5]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP9]] = fmul <4 x float> [[TMP8]], +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ , [[ENTRY]] ], [ [[TMP2:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2]] = fmul <4 x float> [[TMP1]], ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], 128 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP9]], i32 1 -; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[TMP10]], [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i32 2 -; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP9]], i32 3 -; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP13]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 +; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 +; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 +; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP6]] ; CHECK-NEXT: ret float [[ADD31]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll @@ -32,10 +32,10 @@ ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[V5]], i32 5 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i16> [[TMP5]], i16 [[V6]], i32 6 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[V7]], i32 7 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i16> [[TMP7]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16* [[P0]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP8]], <8 x i16>* [[TMP9]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i16> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* [[TMP10]], align 2 ; CHECK-NEXT: ret void ; ; YAML: Pass: slp-vectorizer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll @@ -20,43 +20,35 @@ ; CHECK-NEXT: [[ARRAYIDX_I_I7_6_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 6 ; CHECK-NEXT: [[ARRAYIDX_I_I7_7_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 7 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[CONV31_I]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[CONV31_I]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[CONV31_I]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[CONV31_I]], i32 4 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[CONV31_I]], i32 5 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[CONV31_I]], i32 6 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[CONV31_I]], i32 7 -; CHECK-NEXT: [[TMP9:%.*]] = lshr <8 x i32> [[TMP8]], +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[ARRAYIDX_I_I7_8_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 8 ; CHECK-NEXT: [[ARRAYIDX_I_I7_9_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 9 ; CHECK-NEXT: [[ARRAYIDX_I_I7_10_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 10 ; CHECK-NEXT: [[ARRAYIDX_I_I7_11_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 11 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[CONV31_I]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[CONV31_I]], i32 2 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[CONV31_I]], i32 3 -; CHECK-NEXT: [[TMP14:%.*]] = lshr <4 x i32> [[TMP13]], +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[SHUFFLE1]], ; CHECK-NEXT: [[ARRAYIDX_I_I7_12_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 12 ; CHECK-NEXT: [[SHR_12_I_I:%.*]] = lshr i32 [[CONV31_I]], 13 ; CHECK-NEXT: [[ARRAYIDX_I_I7_13_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 13 ; CHECK-NEXT: [[ARRAYIDX_I_I7_14_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 14 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[CONV31_I]], i32 1 -; CHECK-NEXT: [[TMP17:%.*]] = lshr <2 x i32> [[TMP16]], -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0 -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <16 x i32> [[TMP18]], <16 x i32> [[TMP19]], <16 x i32> -; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <16 x i32> [[TMP20]], <16 x i32> [[TMP21]], <16 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <16 x i32> [[TMP22]], i32 [[SHR_12_I_I]], i32 13 -; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x i32> [[TMP17]], <2 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <16 x i32> [[TMP23]], <16 x i32> [[TMP24]], <16 x i32> -; CHECK-NEXT: [[TMP26:%.*]] = trunc <16 x i32> [[TMP25]] to <16 x i8> -; CHECK-NEXT: [[TMP27:%.*]] = and <16 x i8> [[TMP26]], +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[CONV31_I]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[CONV31_I]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i32> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i32> [[TMP8]], <16 x i32> [[TMP9]], <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP11]], <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 [[SHR_12_I_I]], i32 13 +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <16 x i32> [[TMP13]], <16 x i32> [[TMP14]], <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = trunc <16 x i32> [[TMP15]] to <16 x i8> +; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP16]], ; CHECK-NEXT: [[ARRAYIDX_I_I7_15_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 15 -; CHECK-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* -; CHECK-NEXT: store <16 x i8> [[TMP27]], <16 x i8>* [[TMP28]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* +; CHECK-NEXT: store <16 x i8> [[TMP17]], <16 x i8>* [[TMP18]], align 1 ; CHECK-NEXT: unreachable ; CHECK: if.end50.i: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll b/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll @@ -12,15 +12,13 @@ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>* ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[A]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[A]], i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* -; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[TMP9]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* [[TMP13]], align 4 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <4 x i32> [[TMP4]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP10]], align 4 ; CHECK-NEXT: ret void ; %1 = getelementptr inbounds i32, i32* %x, i64 %i diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll @@ -15,9 +15,9 @@ ; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 4 ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 5 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], poison -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> poison, <4 x i32> [[SHUFFLE1]] -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> poison, <4 x i32> zeroinitializer, <4 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], undef +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> undef, <4 x i32> [[SHUFFLE1]] +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> [[TMP4]] ; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 6 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP27]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll @@ -226,11 +226,9 @@ ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP0]], i64 2 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP0]], i64 3 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP1]], i32 2 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP1]], i32 3 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP3]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[TMP10]], <4 x float>* [[TMP11]], align 4 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP3]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[SHUFFLE]], <4 x float>* [[TMP8]], align 4 ; CHECK-NEXT: ret void ; %3 = getelementptr inbounds float, float* %0, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll @@ -44,212 +44,210 @@ ; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[TMP29]], -183 ; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> poison, i32 [[TMP30]], i32 0 -; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP30]], i32 1 -; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP30]], i32 2 -; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP30]], i32 3 -; CHECK-NEXT: [[TMP35:%.*]] = sub <4 x i32> [[TMP34]], [[TMP0]] -; CHECK-NEXT: [[TMP36:%.*]] = icmp slt <4 x i32> [[TMP35]], zeroinitializer -; CHECK-NEXT: [[TMP37:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP35]] -; CHECK-NEXT: [[TMP38:%.*]] = select <4 x i1> [[TMP36]], <4 x i32> [[TMP37]], <4 x i32> [[TMP35]] -; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP38]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = icmp slt i32 [[TMP39]], [[B_0]] -; CHECK-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP39]], i32 [[B_0]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP31]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP32:%.*]] = sub <4 x i32> [[SHUFFLE]], [[TMP0]] +; CHECK-NEXT: [[TMP33:%.*]] = icmp slt <4 x i32> [[TMP32]], zeroinitializer +; CHECK-NEXT: [[TMP34:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP32]] +; CHECK-NEXT: [[TMP35:%.*]] = select <4 x i1> [[TMP33]], <4 x i32> [[TMP34]], <4 x i32> [[TMP32]] +; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP35]]) +; CHECK-NEXT: [[OP_EXTRA:%.*]] = icmp slt i32 [[TMP36]], [[B_0]] +; CHECK-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP36]], i32 [[B_0]] ; CHECK-NEXT: [[SUB_116:%.*]] = sub i32 [[TMP30]], [[TMP1]] -; CHECK-NEXT: [[TMP40:%.*]] = icmp slt i32 [[SUB_116]], 0 +; CHECK-NEXT: [[TMP37:%.*]] = icmp slt i32 [[SUB_116]], 0 ; CHECK-NEXT: [[NEG_117:%.*]] = sub nsw i32 0, [[SUB_116]] -; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], i32 [[NEG_117]], i32 [[SUB_116]] -; CHECK-NEXT: [[CMP12_118:%.*]] = icmp slt i32 [[TMP41]], [[OP_EXTRA1]] -; CHECK-NEXT: [[SPEC_SELECT8_120:%.*]] = select i1 [[CMP12_118]], i32 [[TMP41]], i32 [[OP_EXTRA1]] +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[NEG_117]], i32 [[SUB_116]] +; CHECK-NEXT: [[CMP12_118:%.*]] = icmp slt i32 [[TMP38]], [[OP_EXTRA1]] +; CHECK-NEXT: [[SPEC_SELECT8_120:%.*]] = select i1 [[CMP12_118]], i32 [[TMP38]], i32 [[OP_EXTRA1]] ; CHECK-NEXT: [[SUB_1_1:%.*]] = sub i32 [[TMP30]], [[TMP2]] -; CHECK-NEXT: [[TMP42:%.*]] = icmp slt i32 [[SUB_1_1]], 0 +; CHECK-NEXT: [[TMP39:%.*]] = icmp slt i32 [[SUB_1_1]], 0 ; CHECK-NEXT: [[NEG_1_1:%.*]] = sub nsw i32 0, [[SUB_1_1]] -; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 [[NEG_1_1]], i32 [[SUB_1_1]] -; CHECK-NEXT: [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP43]], [[SPEC_SELECT8_120]] +; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[NEG_1_1]], i32 [[SUB_1_1]] +; CHECK-NEXT: [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP40]], [[SPEC_SELECT8_120]] ; CHECK-NEXT: [[NARROW:%.*]] = or i1 [[CMP12_1_1]], [[CMP12_118]] -; CHECK-NEXT: [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP43]], i32 [[SPEC_SELECT8_120]] +; CHECK-NEXT: [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP40]], i32 [[SPEC_SELECT8_120]] ; CHECK-NEXT: [[SUB_2_1:%.*]] = sub i32 [[TMP30]], [[TMP3]] -; CHECK-NEXT: [[TMP44:%.*]] = icmp slt i32 [[SUB_2_1]], 0 +; CHECK-NEXT: [[TMP41:%.*]] = icmp slt i32 [[SUB_2_1]], 0 ; CHECK-NEXT: [[NEG_2_1:%.*]] = sub nsw i32 0, [[SUB_2_1]] -; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[NEG_2_1]], i32 [[SUB_2_1]] -; CHECK-NEXT: [[CMP12_2_1:%.*]] = icmp slt i32 [[TMP45]], [[SPEC_SELECT8_1_1]] +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[NEG_2_1]], i32 [[SUB_2_1]] +; CHECK-NEXT: [[CMP12_2_1:%.*]] = icmp slt i32 [[TMP42]], [[SPEC_SELECT8_1_1]] ; CHECK-NEXT: [[NARROW34:%.*]] = or i1 [[CMP12_2_1]], [[NARROW]] -; CHECK-NEXT: [[SPEC_SELECT8_2_1:%.*]] = select i1 [[CMP12_2_1]], i32 [[TMP45]], i32 [[SPEC_SELECT8_1_1]] +; CHECK-NEXT: [[SPEC_SELECT8_2_1:%.*]] = select i1 [[CMP12_2_1]], i32 [[TMP42]], i32 [[SPEC_SELECT8_1_1]] ; CHECK-NEXT: [[SUB_3_1:%.*]] = sub i32 [[TMP30]], [[TMP4]] -; CHECK-NEXT: [[TMP46:%.*]] = icmp slt i32 [[SUB_3_1]], 0 +; CHECK-NEXT: [[TMP43:%.*]] = icmp slt i32 [[SUB_3_1]], 0 ; CHECK-NEXT: [[NEG_3_1:%.*]] = sub nsw i32 0, [[SUB_3_1]] -; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], i32 [[NEG_3_1]], i32 [[SUB_3_1]] -; CHECK-NEXT: [[CMP12_3_1:%.*]] = icmp slt i32 [[TMP47]], [[SPEC_SELECT8_2_1]] +; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[NEG_3_1]], i32 [[SUB_3_1]] +; CHECK-NEXT: [[CMP12_3_1:%.*]] = icmp slt i32 [[TMP44]], [[SPEC_SELECT8_2_1]] ; CHECK-NEXT: [[NARROW35:%.*]] = or i1 [[CMP12_3_1]], [[NARROW34]] ; CHECK-NEXT: [[SPEC_SELECT_3_1:%.*]] = zext i1 [[NARROW35]] to i32 -; CHECK-NEXT: [[SPEC_SELECT8_3_1:%.*]] = select i1 [[CMP12_3_1]], i32 [[TMP47]], i32 [[SPEC_SELECT8_2_1]] +; CHECK-NEXT: [[SPEC_SELECT8_3_1:%.*]] = select i1 [[CMP12_3_1]], i32 [[TMP44]], i32 [[SPEC_SELECT8_2_1]] ; CHECK-NEXT: [[SUB_222:%.*]] = sub i32 [[TMP30]], [[TMP5]] -; CHECK-NEXT: [[TMP48:%.*]] = icmp slt i32 [[SUB_222]], 0 +; CHECK-NEXT: [[TMP45:%.*]] = icmp slt i32 [[SUB_222]], 0 ; CHECK-NEXT: [[NEG_223:%.*]] = sub nsw i32 0, [[SUB_222]] -; CHECK-NEXT: [[TMP49:%.*]] = select i1 [[TMP48]], i32 [[NEG_223]], i32 [[SUB_222]] -; CHECK-NEXT: [[CMP12_224:%.*]] = icmp slt i32 [[TMP49]], [[SPEC_SELECT8_3_1]] -; CHECK-NEXT: [[SPEC_SELECT8_226:%.*]] = select i1 [[CMP12_224]], i32 [[TMP49]], i32 [[SPEC_SELECT8_3_1]] +; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[NEG_223]], i32 [[SUB_222]] +; CHECK-NEXT: [[CMP12_224:%.*]] = icmp slt i32 [[TMP46]], [[SPEC_SELECT8_3_1]] +; CHECK-NEXT: [[SPEC_SELECT8_226:%.*]] = select i1 [[CMP12_224]], i32 [[TMP46]], i32 [[SPEC_SELECT8_3_1]] ; CHECK-NEXT: [[SUB_1_2:%.*]] = sub i32 [[TMP30]], [[TMP6]] -; CHECK-NEXT: [[TMP50:%.*]] = icmp slt i32 [[SUB_1_2]], 0 +; CHECK-NEXT: [[TMP47:%.*]] = icmp slt i32 [[SUB_1_2]], 0 ; CHECK-NEXT: [[NEG_1_2:%.*]] = sub nsw i32 0, [[SUB_1_2]] -; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[NEG_1_2]], i32 [[SUB_1_2]] -; CHECK-NEXT: [[CMP12_1_2:%.*]] = icmp slt i32 [[TMP51]], [[SPEC_SELECT8_226]] -; CHECK-NEXT: [[TMP52:%.*]] = or i1 [[CMP12_1_2]], [[CMP12_224]] -; CHECK-NEXT: [[SPEC_SELECT8_1_2:%.*]] = select i1 [[CMP12_1_2]], i32 [[TMP51]], i32 [[SPEC_SELECT8_226]] +; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[NEG_1_2]], i32 [[SUB_1_2]] +; CHECK-NEXT: [[CMP12_1_2:%.*]] = icmp slt i32 [[TMP48]], [[SPEC_SELECT8_226]] +; CHECK-NEXT: [[TMP49:%.*]] = or i1 [[CMP12_1_2]], [[CMP12_224]] +; CHECK-NEXT: [[SPEC_SELECT8_1_2:%.*]] = select i1 [[CMP12_1_2]], i32 [[TMP48]], i32 [[SPEC_SELECT8_226]] ; CHECK-NEXT: [[SUB_2_2:%.*]] = sub i32 [[TMP30]], [[TMP7]] -; CHECK-NEXT: [[TMP53:%.*]] = icmp slt i32 [[SUB_2_2]], 0 +; CHECK-NEXT: [[TMP50:%.*]] = icmp slt i32 [[SUB_2_2]], 0 ; CHECK-NEXT: [[NEG_2_2:%.*]] = sub nsw i32 0, [[SUB_2_2]] -; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[NEG_2_2]], i32 [[SUB_2_2]] -; CHECK-NEXT: [[CMP12_2_2:%.*]] = icmp slt i32 [[TMP54]], [[SPEC_SELECT8_1_2]] -; CHECK-NEXT: [[TMP55:%.*]] = or i1 [[CMP12_2_2]], [[TMP52]] -; CHECK-NEXT: [[SPEC_SELECT8_2_2:%.*]] = select i1 [[CMP12_2_2]], i32 [[TMP54]], i32 [[SPEC_SELECT8_1_2]] +; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[NEG_2_2]], i32 [[SUB_2_2]] +; CHECK-NEXT: [[CMP12_2_2:%.*]] = icmp slt i32 [[TMP51]], [[SPEC_SELECT8_1_2]] +; CHECK-NEXT: [[TMP52:%.*]] = or i1 [[CMP12_2_2]], [[TMP49]] +; CHECK-NEXT: [[SPEC_SELECT8_2_2:%.*]] = select i1 [[CMP12_2_2]], i32 [[TMP51]], i32 [[SPEC_SELECT8_1_2]] ; CHECK-NEXT: [[SUB_3_2:%.*]] = sub i32 [[TMP30]], [[TMP8]] -; CHECK-NEXT: [[TMP56:%.*]] = icmp slt i32 [[SUB_3_2]], 0 +; CHECK-NEXT: [[TMP53:%.*]] = icmp slt i32 [[SUB_3_2]], 0 ; CHECK-NEXT: [[NEG_3_2:%.*]] = sub nsw i32 0, [[SUB_3_2]] -; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[NEG_3_2]], i32 [[SUB_3_2]] -; CHECK-NEXT: [[CMP12_3_2:%.*]] = icmp slt i32 [[TMP57]], [[SPEC_SELECT8_2_2]] -; CHECK-NEXT: [[TMP58:%.*]] = or i1 [[CMP12_3_2]], [[TMP55]] -; CHECK-NEXT: [[SPEC_SELECT_3_2:%.*]] = select i1 [[TMP58]], i32 2, i32 [[SPEC_SELECT_3_1]] -; CHECK-NEXT: [[SPEC_SELECT8_3_2:%.*]] = select i1 [[CMP12_3_2]], i32 [[TMP57]], i32 [[SPEC_SELECT8_2_2]] +; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[NEG_3_2]], i32 [[SUB_3_2]] +; CHECK-NEXT: [[CMP12_3_2:%.*]] = icmp slt i32 [[TMP54]], [[SPEC_SELECT8_2_2]] +; CHECK-NEXT: [[TMP55:%.*]] = or i1 [[CMP12_3_2]], [[TMP52]] +; CHECK-NEXT: [[SPEC_SELECT_3_2:%.*]] = select i1 [[TMP55]], i32 2, i32 [[SPEC_SELECT_3_1]] +; CHECK-NEXT: [[SPEC_SELECT8_3_2:%.*]] = select i1 [[CMP12_3_2]], i32 [[TMP54]], i32 [[SPEC_SELECT8_2_2]] ; CHECK-NEXT: [[SUB_328:%.*]] = sub i32 [[TMP30]], [[TMP9]] -; CHECK-NEXT: [[TMP59:%.*]] = icmp slt i32 [[SUB_328]], 0 +; CHECK-NEXT: [[TMP56:%.*]] = icmp slt i32 [[SUB_328]], 0 ; CHECK-NEXT: [[NEG_329:%.*]] = sub nsw i32 0, [[SUB_328]] -; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[NEG_329]], i32 [[SUB_328]] -; CHECK-NEXT: [[CMP12_330:%.*]] = icmp slt i32 [[TMP60]], [[SPEC_SELECT8_3_2]] -; CHECK-NEXT: [[SPEC_SELECT8_332:%.*]] = select i1 [[CMP12_330]], i32 [[TMP60]], i32 [[SPEC_SELECT8_3_2]] +; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[NEG_329]], i32 [[SUB_328]] +; CHECK-NEXT: [[CMP12_330:%.*]] = icmp slt i32 [[TMP57]], [[SPEC_SELECT8_3_2]] +; CHECK-NEXT: [[SPEC_SELECT8_332:%.*]] = select i1 [[CMP12_330]], i32 [[TMP57]], i32 [[SPEC_SELECT8_3_2]] ; CHECK-NEXT: [[SUB_1_3:%.*]] = sub i32 [[TMP30]], [[TMP10]] -; CHECK-NEXT: [[TMP61:%.*]] = icmp slt i32 [[SUB_1_3]], 0 +; CHECK-NEXT: [[TMP58:%.*]] = icmp slt i32 [[SUB_1_3]], 0 ; CHECK-NEXT: [[NEG_1_3:%.*]] = sub nsw i32 0, [[SUB_1_3]] -; CHECK-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[NEG_1_3]], i32 [[SUB_1_3]] -; CHECK-NEXT: [[CMP12_1_3:%.*]] = icmp slt i32 [[TMP62]], [[SPEC_SELECT8_332]] -; CHECK-NEXT: [[TMP63:%.*]] = or i1 [[CMP12_1_3]], [[CMP12_330]] -; CHECK-NEXT: [[SPEC_SELECT8_1_3:%.*]] = select i1 [[CMP12_1_3]], i32 [[TMP62]], i32 [[SPEC_SELECT8_332]] +; CHECK-NEXT: [[TMP59:%.*]] = select i1 [[TMP58]], i32 [[NEG_1_3]], i32 [[SUB_1_3]] +; CHECK-NEXT: [[CMP12_1_3:%.*]] = icmp slt i32 [[TMP59]], [[SPEC_SELECT8_332]] +; CHECK-NEXT: [[TMP60:%.*]] = or i1 [[CMP12_1_3]], [[CMP12_330]] +; CHECK-NEXT: [[SPEC_SELECT8_1_3:%.*]] = select i1 [[CMP12_1_3]], i32 [[TMP59]], i32 [[SPEC_SELECT8_332]] ; CHECK-NEXT: [[SUB_2_3:%.*]] = sub i32 [[TMP30]], [[TMP11]] -; CHECK-NEXT: [[TMP64:%.*]] = icmp slt i32 [[SUB_2_3]], 0 +; CHECK-NEXT: [[TMP61:%.*]] = icmp slt i32 [[SUB_2_3]], 0 ; CHECK-NEXT: [[NEG_2_3:%.*]] = sub nsw i32 0, [[SUB_2_3]] -; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP64]], i32 [[NEG_2_3]], i32 [[SUB_2_3]] -; CHECK-NEXT: [[CMP12_2_3:%.*]] = icmp slt i32 [[TMP65]], [[SPEC_SELECT8_1_3]] -; CHECK-NEXT: [[TMP66:%.*]] = or i1 [[CMP12_2_3]], [[TMP63]] -; CHECK-NEXT: [[SPEC_SELECT8_2_3:%.*]] = select i1 [[CMP12_2_3]], i32 [[TMP65]], i32 [[SPEC_SELECT8_1_3]] +; CHECK-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[NEG_2_3]], i32 [[SUB_2_3]] +; CHECK-NEXT: [[CMP12_2_3:%.*]] = icmp slt i32 [[TMP62]], [[SPEC_SELECT8_1_3]] +; CHECK-NEXT: [[TMP63:%.*]] = or i1 [[CMP12_2_3]], [[TMP60]] +; CHECK-NEXT: [[SPEC_SELECT8_2_3:%.*]] = select i1 [[CMP12_2_3]], i32 [[TMP62]], i32 [[SPEC_SELECT8_1_3]] ; CHECK-NEXT: [[SUB_3_3:%.*]] = sub i32 [[TMP30]], [[TMP12]] -; CHECK-NEXT: [[TMP67:%.*]] = icmp slt i32 [[SUB_3_3]], 0 +; CHECK-NEXT: [[TMP64:%.*]] = icmp slt i32 [[SUB_3_3]], 0 ; CHECK-NEXT: [[NEG_3_3:%.*]] = sub nsw i32 0, [[SUB_3_3]] -; CHECK-NEXT: [[TMP68:%.*]] = select i1 [[TMP67]], i32 [[NEG_3_3]], i32 [[SUB_3_3]] -; CHECK-NEXT: [[CMP12_3_3:%.*]] = icmp slt i32 [[TMP68]], [[SPEC_SELECT8_2_3]] -; CHECK-NEXT: [[TMP69:%.*]] = or i1 [[CMP12_3_3]], [[TMP66]] -; CHECK-NEXT: [[SPEC_SELECT_3_3:%.*]] = select i1 [[TMP69]], i32 3, i32 [[SPEC_SELECT_3_2]] -; CHECK-NEXT: [[SPEC_SELECT8_3_3:%.*]] = select i1 [[CMP12_3_3]], i32 [[TMP68]], i32 [[SPEC_SELECT8_2_3]] +; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP64]], i32 [[NEG_3_3]], i32 [[SUB_3_3]] +; CHECK-NEXT: [[CMP12_3_3:%.*]] = icmp slt i32 [[TMP65]], [[SPEC_SELECT8_2_3]] +; CHECK-NEXT: [[TMP66:%.*]] = or i1 [[CMP12_3_3]], [[TMP63]] +; CHECK-NEXT: [[SPEC_SELECT_3_3:%.*]] = select i1 [[TMP66]], i32 3, i32 [[SPEC_SELECT_3_2]] +; CHECK-NEXT: [[SPEC_SELECT8_3_3:%.*]] = select i1 [[CMP12_3_3]], i32 [[TMP65]], i32 [[SPEC_SELECT8_2_3]] ; CHECK-NEXT: [[SUB_4:%.*]] = sub i32 [[TMP30]], [[TMP13]] -; CHECK-NEXT: [[TMP70:%.*]] = icmp slt i32 [[SUB_4]], 0 +; CHECK-NEXT: [[TMP67:%.*]] = icmp slt i32 [[SUB_4]], 0 ; CHECK-NEXT: [[NEG_4:%.*]] = sub nsw i32 0, [[SUB_4]] -; CHECK-NEXT: [[TMP71:%.*]] = select i1 [[TMP70]], i32 [[NEG_4]], i32 [[SUB_4]] -; CHECK-NEXT: [[CMP12_4:%.*]] = icmp slt i32 [[TMP71]], [[SPEC_SELECT8_3_3]] -; CHECK-NEXT: [[SPEC_SELECT8_4:%.*]] = select i1 [[CMP12_4]], i32 [[TMP71]], i32 [[SPEC_SELECT8_3_3]] +; CHECK-NEXT: [[TMP68:%.*]] = select i1 [[TMP67]], i32 [[NEG_4]], i32 [[SUB_4]] +; CHECK-NEXT: [[CMP12_4:%.*]] = icmp slt i32 [[TMP68]], [[SPEC_SELECT8_3_3]] +; CHECK-NEXT: [[SPEC_SELECT8_4:%.*]] = select i1 [[CMP12_4]], i32 [[TMP68]], i32 [[SPEC_SELECT8_3_3]] ; CHECK-NEXT: [[SUB_1_4:%.*]] = sub i32 [[TMP30]], [[TMP14]] -; CHECK-NEXT: [[TMP72:%.*]] = icmp slt i32 [[SUB_1_4]], 0 +; CHECK-NEXT: [[TMP69:%.*]] = icmp slt i32 [[SUB_1_4]], 0 ; CHECK-NEXT: [[NEG_1_4:%.*]] = sub nsw i32 0, [[SUB_1_4]] -; CHECK-NEXT: [[TMP73:%.*]] = select i1 [[TMP72]], i32 [[NEG_1_4]], i32 [[SUB_1_4]] -; CHECK-NEXT: [[CMP12_1_4:%.*]] = icmp slt i32 [[TMP73]], [[SPEC_SELECT8_4]] -; CHECK-NEXT: [[TMP74:%.*]] = or i1 [[CMP12_1_4]], [[CMP12_4]] -; CHECK-NEXT: [[SPEC_SELECT8_1_4:%.*]] = select i1 [[CMP12_1_4]], i32 [[TMP73]], i32 [[SPEC_SELECT8_4]] +; CHECK-NEXT: [[TMP70:%.*]] = select i1 [[TMP69]], i32 [[NEG_1_4]], i32 [[SUB_1_4]] +; CHECK-NEXT: [[CMP12_1_4:%.*]] = icmp slt i32 [[TMP70]], [[SPEC_SELECT8_4]] +; CHECK-NEXT: [[TMP71:%.*]] = or i1 [[CMP12_1_4]], [[CMP12_4]] +; CHECK-NEXT: [[SPEC_SELECT8_1_4:%.*]] = select i1 [[CMP12_1_4]], i32 [[TMP70]], i32 [[SPEC_SELECT8_4]] ; CHECK-NEXT: [[SUB_2_4:%.*]] = sub i32 [[TMP30]], [[TMP15]] -; CHECK-NEXT: [[TMP75:%.*]] = icmp slt i32 [[SUB_2_4]], 0 +; CHECK-NEXT: [[TMP72:%.*]] = icmp slt i32 [[SUB_2_4]], 0 ; CHECK-NEXT: [[NEG_2_4:%.*]] = sub nsw i32 0, [[SUB_2_4]] -; CHECK-NEXT: [[TMP76:%.*]] = select i1 [[TMP75]], i32 [[NEG_2_4]], i32 [[SUB_2_4]] -; CHECK-NEXT: [[CMP12_2_4:%.*]] = icmp slt i32 [[TMP76]], [[SPEC_SELECT8_1_4]] -; CHECK-NEXT: [[TMP77:%.*]] = or i1 [[CMP12_2_4]], [[TMP74]] -; CHECK-NEXT: [[SPEC_SELECT8_2_4:%.*]] = select i1 [[CMP12_2_4]], i32 [[TMP76]], i32 [[SPEC_SELECT8_1_4]] +; CHECK-NEXT: [[TMP73:%.*]] = select i1 [[TMP72]], i32 [[NEG_2_4]], i32 [[SUB_2_4]] +; CHECK-NEXT: [[CMP12_2_4:%.*]] = icmp slt i32 [[TMP73]], [[SPEC_SELECT8_1_4]] +; CHECK-NEXT: [[TMP74:%.*]] = or i1 [[CMP12_2_4]], [[TMP71]] +; CHECK-NEXT: [[SPEC_SELECT8_2_4:%.*]] = select i1 [[CMP12_2_4]], i32 [[TMP73]], i32 [[SPEC_SELECT8_1_4]] ; CHECK-NEXT: [[SUB_3_4:%.*]] = sub i32 [[TMP30]], [[TMP16]] -; CHECK-NEXT: [[TMP78:%.*]] = icmp slt i32 [[SUB_3_4]], 0 +; CHECK-NEXT: [[TMP75:%.*]] = icmp slt i32 [[SUB_3_4]], 0 ; CHECK-NEXT: [[NEG_3_4:%.*]] = sub nsw i32 0, [[SUB_3_4]] -; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 [[NEG_3_4]], i32 [[SUB_3_4]] -; CHECK-NEXT: [[CMP12_3_4:%.*]] = icmp slt i32 [[TMP79]], [[SPEC_SELECT8_2_4]] -; CHECK-NEXT: [[TMP80:%.*]] = or i1 [[CMP12_3_4]], [[TMP77]] -; CHECK-NEXT: [[SPEC_SELECT_3_4:%.*]] = select i1 [[TMP80]], i32 4, i32 [[SPEC_SELECT_3_3]] -; CHECK-NEXT: [[SPEC_SELECT8_3_4:%.*]] = select i1 [[CMP12_3_4]], i32 [[TMP79]], i32 [[SPEC_SELECT8_2_4]] +; CHECK-NEXT: [[TMP76:%.*]] = select i1 [[TMP75]], i32 [[NEG_3_4]], i32 [[SUB_3_4]] +; CHECK-NEXT: [[CMP12_3_4:%.*]] = icmp slt i32 [[TMP76]], [[SPEC_SELECT8_2_4]] +; CHECK-NEXT: [[TMP77:%.*]] = or i1 [[CMP12_3_4]], [[TMP74]] +; CHECK-NEXT: [[SPEC_SELECT_3_4:%.*]] = select i1 [[TMP77]], i32 4, i32 [[SPEC_SELECT_3_3]] +; CHECK-NEXT: [[SPEC_SELECT8_3_4:%.*]] = select i1 [[CMP12_3_4]], i32 [[TMP76]], i32 [[SPEC_SELECT8_2_4]] ; CHECK-NEXT: [[SUB_5:%.*]] = sub i32 [[TMP30]], [[TMP17]] -; CHECK-NEXT: [[TMP81:%.*]] = icmp slt i32 [[SUB_5]], 0 +; CHECK-NEXT: [[TMP78:%.*]] = icmp slt i32 [[SUB_5]], 0 ; CHECK-NEXT: [[NEG_5:%.*]] = sub nsw i32 0, [[SUB_5]] -; CHECK-NEXT: [[TMP82:%.*]] = select i1 [[TMP81]], i32 [[NEG_5]], i32 [[SUB_5]] -; CHECK-NEXT: [[CMP12_5:%.*]] = icmp slt i32 [[TMP82]], [[SPEC_SELECT8_3_4]] -; CHECK-NEXT: [[SPEC_SELECT8_5:%.*]] = select i1 [[CMP12_5]], i32 [[TMP82]], i32 [[SPEC_SELECT8_3_4]] +; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 [[NEG_5]], i32 [[SUB_5]] +; CHECK-NEXT: [[CMP12_5:%.*]] = icmp slt i32 [[TMP79]], [[SPEC_SELECT8_3_4]] +; CHECK-NEXT: [[SPEC_SELECT8_5:%.*]] = select i1 [[CMP12_5]], i32 [[TMP79]], i32 [[SPEC_SELECT8_3_4]] ; CHECK-NEXT: [[SUB_1_5:%.*]] = sub i32 [[TMP30]], [[TMP18]] -; CHECK-NEXT: [[TMP83:%.*]] = icmp slt i32 [[SUB_1_5]], 0 +; CHECK-NEXT: [[TMP80:%.*]] = icmp slt i32 [[SUB_1_5]], 0 ; CHECK-NEXT: [[NEG_1_5:%.*]] = sub nsw i32 0, [[SUB_1_5]] -; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], i32 [[NEG_1_5]], i32 [[SUB_1_5]] -; CHECK-NEXT: [[CMP12_1_5:%.*]] = icmp slt i32 [[TMP84]], [[SPEC_SELECT8_5]] -; CHECK-NEXT: [[TMP85:%.*]] = or i1 [[CMP12_1_5]], [[CMP12_5]] -; CHECK-NEXT: [[SPEC_SELECT8_1_5:%.*]] = select i1 [[CMP12_1_5]], i32 [[TMP84]], i32 [[SPEC_SELECT8_5]] +; CHECK-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], i32 [[NEG_1_5]], i32 [[SUB_1_5]] +; CHECK-NEXT: [[CMP12_1_5:%.*]] = icmp slt i32 [[TMP81]], [[SPEC_SELECT8_5]] +; CHECK-NEXT: [[TMP82:%.*]] = or i1 [[CMP12_1_5]], [[CMP12_5]] +; CHECK-NEXT: [[SPEC_SELECT8_1_5:%.*]] = select i1 [[CMP12_1_5]], i32 [[TMP81]], i32 [[SPEC_SELECT8_5]] ; CHECK-NEXT: [[SUB_2_5:%.*]] = sub i32 [[TMP30]], [[TMP19]] -; CHECK-NEXT: [[TMP86:%.*]] = icmp slt i32 [[SUB_2_5]], 0 +; CHECK-NEXT: [[TMP83:%.*]] = icmp slt i32 [[SUB_2_5]], 0 ; CHECK-NEXT: [[NEG_2_5:%.*]] = sub nsw i32 0, [[SUB_2_5]] -; CHECK-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[NEG_2_5]], i32 [[SUB_2_5]] -; CHECK-NEXT: [[CMP12_2_5:%.*]] = icmp slt i32 [[TMP87]], [[SPEC_SELECT8_1_5]] -; CHECK-NEXT: [[TMP88:%.*]] = or i1 [[CMP12_2_5]], [[TMP85]] -; CHECK-NEXT: [[SPEC_SELECT8_2_5:%.*]] = select i1 [[CMP12_2_5]], i32 [[TMP87]], i32 [[SPEC_SELECT8_1_5]] +; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], i32 [[NEG_2_5]], i32 [[SUB_2_5]] +; CHECK-NEXT: [[CMP12_2_5:%.*]] = icmp slt i32 [[TMP84]], [[SPEC_SELECT8_1_5]] +; CHECK-NEXT: [[TMP85:%.*]] = or i1 [[CMP12_2_5]], [[TMP82]] +; CHECK-NEXT: [[SPEC_SELECT8_2_5:%.*]] = select i1 [[CMP12_2_5]], i32 [[TMP84]], i32 [[SPEC_SELECT8_1_5]] ; CHECK-NEXT: [[SUB_3_5:%.*]] = sub i32 [[TMP30]], [[TMP20]] -; CHECK-NEXT: [[TMP89:%.*]] = icmp slt i32 [[SUB_3_5]], 0 +; CHECK-NEXT: [[TMP86:%.*]] = icmp slt i32 [[SUB_3_5]], 0 ; CHECK-NEXT: [[NEG_3_5:%.*]] = sub nsw i32 0, [[SUB_3_5]] -; CHECK-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[NEG_3_5]], i32 [[SUB_3_5]] -; CHECK-NEXT: [[CMP12_3_5:%.*]] = icmp slt i32 [[TMP90]], [[SPEC_SELECT8_2_5]] -; CHECK-NEXT: [[TMP91:%.*]] = or i1 [[CMP12_3_5]], [[TMP88]] -; CHECK-NEXT: [[SPEC_SELECT_3_5:%.*]] = select i1 [[TMP91]], i32 5, i32 [[SPEC_SELECT_3_4]] -; CHECK-NEXT: [[SPEC_SELECT8_3_5:%.*]] = select i1 [[CMP12_3_5]], i32 [[TMP90]], i32 [[SPEC_SELECT8_2_5]] +; CHECK-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[NEG_3_5]], i32 [[SUB_3_5]] +; CHECK-NEXT: [[CMP12_3_5:%.*]] = icmp slt i32 [[TMP87]], [[SPEC_SELECT8_2_5]] +; CHECK-NEXT: [[TMP88:%.*]] = or i1 [[CMP12_3_5]], [[TMP85]] +; CHECK-NEXT: [[SPEC_SELECT_3_5:%.*]] = select i1 [[TMP88]], i32 5, i32 [[SPEC_SELECT_3_4]] +; CHECK-NEXT: [[SPEC_SELECT8_3_5:%.*]] = select i1 [[CMP12_3_5]], i32 [[TMP87]], i32 [[SPEC_SELECT8_2_5]] ; CHECK-NEXT: [[SUB_6:%.*]] = sub i32 [[TMP30]], [[TMP21]] -; CHECK-NEXT: [[TMP92:%.*]] = icmp slt i32 [[SUB_6]], 0 +; CHECK-NEXT: [[TMP89:%.*]] = icmp slt i32 [[SUB_6]], 0 ; CHECK-NEXT: [[NEG_6:%.*]] = sub nsw i32 0, [[SUB_6]] -; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 [[NEG_6]], i32 [[SUB_6]] -; CHECK-NEXT: [[CMP12_6:%.*]] = icmp slt i32 [[TMP93]], [[SPEC_SELECT8_3_5]] -; CHECK-NEXT: [[SPEC_SELECT8_6:%.*]] = select i1 [[CMP12_6]], i32 [[TMP93]], i32 [[SPEC_SELECT8_3_5]] +; CHECK-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[NEG_6]], i32 [[SUB_6]] +; CHECK-NEXT: [[CMP12_6:%.*]] = icmp slt i32 [[TMP90]], [[SPEC_SELECT8_3_5]] +; CHECK-NEXT: [[SPEC_SELECT8_6:%.*]] = select i1 [[CMP12_6]], i32 [[TMP90]], i32 [[SPEC_SELECT8_3_5]] ; CHECK-NEXT: [[SUB_1_6:%.*]] = sub i32 [[TMP30]], [[TMP22]] -; CHECK-NEXT: [[TMP94:%.*]] = icmp slt i32 [[SUB_1_6]], 0 +; CHECK-NEXT: [[TMP91:%.*]] = icmp slt i32 [[SUB_1_6]], 0 ; CHECK-NEXT: [[NEG_1_6:%.*]] = sub nsw i32 0, [[SUB_1_6]] -; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], i32 [[NEG_1_6]], i32 [[SUB_1_6]] -; CHECK-NEXT: [[CMP12_1_6:%.*]] = icmp slt i32 [[TMP95]], [[SPEC_SELECT8_6]] -; CHECK-NEXT: [[TMP96:%.*]] = or i1 [[CMP12_1_6]], [[CMP12_6]] -; CHECK-NEXT: [[SPEC_SELECT8_1_6:%.*]] = select i1 [[CMP12_1_6]], i32 [[TMP95]], i32 [[SPEC_SELECT8_6]] +; CHECK-NEXT: [[TMP92:%.*]] = select i1 [[TMP91]], i32 [[NEG_1_6]], i32 [[SUB_1_6]] +; CHECK-NEXT: [[CMP12_1_6:%.*]] = icmp slt i32 [[TMP92]], [[SPEC_SELECT8_6]] +; CHECK-NEXT: [[TMP93:%.*]] = or i1 [[CMP12_1_6]], [[CMP12_6]] +; CHECK-NEXT: [[SPEC_SELECT8_1_6:%.*]] = select i1 [[CMP12_1_6]], i32 [[TMP92]], i32 [[SPEC_SELECT8_6]] ; CHECK-NEXT: [[SUB_2_6:%.*]] = sub i32 [[TMP30]], [[TMP23]] -; CHECK-NEXT: [[TMP97:%.*]] = icmp slt i32 [[SUB_2_6]], 0 +; CHECK-NEXT: [[TMP94:%.*]] = icmp slt i32 [[SUB_2_6]], 0 ; CHECK-NEXT: [[NEG_2_6:%.*]] = sub nsw i32 0, [[SUB_2_6]] -; CHECK-NEXT: [[TMP98:%.*]] = select i1 [[TMP97]], i32 [[NEG_2_6]], i32 [[SUB_2_6]] -; CHECK-NEXT: [[CMP12_2_6:%.*]] = icmp slt i32 [[TMP98]], [[SPEC_SELECT8_1_6]] -; CHECK-NEXT: [[TMP99:%.*]] = or i1 [[CMP12_2_6]], [[TMP96]] -; CHECK-NEXT: [[SPEC_SELECT8_2_6:%.*]] = select i1 [[CMP12_2_6]], i32 [[TMP98]], i32 [[SPEC_SELECT8_1_6]] +; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], i32 [[NEG_2_6]], i32 [[SUB_2_6]] +; CHECK-NEXT: [[CMP12_2_6:%.*]] = icmp slt i32 [[TMP95]], [[SPEC_SELECT8_1_6]] +; CHECK-NEXT: [[TMP96:%.*]] = or i1 [[CMP12_2_6]], [[TMP93]] +; CHECK-NEXT: [[SPEC_SELECT8_2_6:%.*]] = select i1 [[CMP12_2_6]], i32 [[TMP95]], i32 [[SPEC_SELECT8_1_6]] ; CHECK-NEXT: [[SUB_3_6:%.*]] = sub i32 [[TMP30]], [[TMP24]] -; CHECK-NEXT: [[TMP100:%.*]] = icmp slt i32 [[SUB_3_6]], 0 +; CHECK-NEXT: [[TMP97:%.*]] = icmp slt i32 [[SUB_3_6]], 0 ; CHECK-NEXT: [[NEG_3_6:%.*]] = sub nsw i32 0, [[SUB_3_6]] -; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP100]], i32 [[NEG_3_6]], i32 [[SUB_3_6]] -; CHECK-NEXT: [[CMP12_3_6:%.*]] = icmp slt i32 [[TMP101]], [[SPEC_SELECT8_2_6]] -; CHECK-NEXT: [[TMP102:%.*]] = or i1 [[CMP12_3_6]], [[TMP99]] -; CHECK-NEXT: [[SPEC_SELECT_3_6:%.*]] = select i1 [[TMP102]], i32 6, i32 [[SPEC_SELECT_3_5]] -; CHECK-NEXT: [[SPEC_SELECT8_3_6:%.*]] = select i1 [[CMP12_3_6]], i32 [[TMP101]], i32 [[SPEC_SELECT8_2_6]] +; CHECK-NEXT: [[TMP98:%.*]] = select i1 [[TMP97]], i32 [[NEG_3_6]], i32 [[SUB_3_6]] +; CHECK-NEXT: [[CMP12_3_6:%.*]] = icmp slt i32 [[TMP98]], [[SPEC_SELECT8_2_6]] +; CHECK-NEXT: [[TMP99:%.*]] = or i1 [[CMP12_3_6]], [[TMP96]] +; CHECK-NEXT: [[SPEC_SELECT_3_6:%.*]] = select i1 [[TMP99]], i32 6, i32 [[SPEC_SELECT_3_5]] +; CHECK-NEXT: [[SPEC_SELECT8_3_6:%.*]] = select i1 [[CMP12_3_6]], i32 [[TMP98]], i32 [[SPEC_SELECT8_2_6]] ; CHECK-NEXT: [[SUB_7:%.*]] = sub i32 [[TMP30]], [[TMP25]] -; CHECK-NEXT: [[TMP103:%.*]] = icmp slt i32 [[SUB_7]], 0 +; CHECK-NEXT: [[TMP100:%.*]] = icmp slt i32 [[SUB_7]], 0 ; CHECK-NEXT: [[NEG_7:%.*]] = sub nsw i32 0, [[SUB_7]] -; CHECK-NEXT: [[TMP104:%.*]] = select i1 [[TMP103]], i32 [[NEG_7]], i32 [[SUB_7]] -; CHECK-NEXT: [[CMP12_7:%.*]] = icmp slt i32 [[TMP104]], [[SPEC_SELECT8_3_6]] -; CHECK-NEXT: [[SPEC_SELECT8_7:%.*]] = select i1 [[CMP12_7]], i32 [[TMP104]], i32 [[SPEC_SELECT8_3_6]] +; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP100]], i32 [[NEG_7]], i32 [[SUB_7]] +; CHECK-NEXT: [[CMP12_7:%.*]] = icmp slt i32 [[TMP101]], [[SPEC_SELECT8_3_6]] +; CHECK-NEXT: [[SPEC_SELECT8_7:%.*]] = select i1 [[CMP12_7]], i32 [[TMP101]], i32 [[SPEC_SELECT8_3_6]] ; CHECK-NEXT: [[SUB_1_7:%.*]] = sub i32 [[TMP30]], [[TMP26]] -; CHECK-NEXT: [[TMP105:%.*]] = icmp slt i32 [[SUB_1_7]], 0 +; CHECK-NEXT: [[TMP102:%.*]] = icmp slt i32 [[SUB_1_7]], 0 ; CHECK-NEXT: [[NEG_1_7:%.*]] = sub nsw i32 0, [[SUB_1_7]] -; CHECK-NEXT: [[TMP106:%.*]] = select i1 [[TMP105]], i32 [[NEG_1_7]], i32 [[SUB_1_7]] -; CHECK-NEXT: [[CMP12_1_7:%.*]] = icmp slt i32 [[TMP106]], [[SPEC_SELECT8_7]] -; CHECK-NEXT: [[TMP107:%.*]] = or i1 [[CMP12_1_7]], [[CMP12_7]] -; CHECK-NEXT: [[SPEC_SELECT8_1_7:%.*]] = select i1 [[CMP12_1_7]], i32 [[TMP106]], i32 [[SPEC_SELECT8_7]] +; CHECK-NEXT: [[TMP103:%.*]] = select i1 [[TMP102]], i32 [[NEG_1_7]], i32 [[SUB_1_7]] +; CHECK-NEXT: [[CMP12_1_7:%.*]] = icmp slt i32 [[TMP103]], [[SPEC_SELECT8_7]] +; CHECK-NEXT: [[TMP104:%.*]] = or i1 [[CMP12_1_7]], [[CMP12_7]] +; CHECK-NEXT: [[SPEC_SELECT8_1_7:%.*]] = select i1 [[CMP12_1_7]], i32 [[TMP103]], i32 [[SPEC_SELECT8_7]] ; CHECK-NEXT: [[SUB_2_7:%.*]] = sub i32 [[TMP30]], [[TMP27]] -; CHECK-NEXT: [[TMP108:%.*]] = icmp slt i32 [[SUB_2_7]], 0 +; CHECK-NEXT: [[TMP105:%.*]] = icmp slt i32 [[SUB_2_7]], 0 ; CHECK-NEXT: [[NEG_2_7:%.*]] = sub nsw i32 0, [[SUB_2_7]] -; CHECK-NEXT: [[TMP109:%.*]] = select i1 [[TMP108]], i32 [[NEG_2_7]], i32 [[SUB_2_7]] -; CHECK-NEXT: [[CMP12_2_7:%.*]] = icmp slt i32 [[TMP109]], [[SPEC_SELECT8_1_7]] -; CHECK-NEXT: [[TMP110:%.*]] = or i1 [[CMP12_2_7]], [[TMP107]] -; CHECK-NEXT: [[SPEC_SELECT8_2_7:%.*]] = select i1 [[CMP12_2_7]], i32 [[TMP109]], i32 [[SPEC_SELECT8_1_7]] +; CHECK-NEXT: [[TMP106:%.*]] = select i1 [[TMP105]], i32 [[NEG_2_7]], i32 [[SUB_2_7]] +; CHECK-NEXT: [[CMP12_2_7:%.*]] = icmp slt i32 [[TMP106]], [[SPEC_SELECT8_1_7]] +; CHECK-NEXT: [[TMP107:%.*]] = or i1 [[CMP12_2_7]], [[TMP104]] +; CHECK-NEXT: [[SPEC_SELECT8_2_7:%.*]] = select i1 [[CMP12_2_7]], i32 [[TMP106]], i32 [[SPEC_SELECT8_1_7]] ; CHECK-NEXT: [[SUB_3_7:%.*]] = sub i32 [[TMP30]], [[TMP28]] -; CHECK-NEXT: [[TMP111:%.*]] = icmp slt i32 [[SUB_3_7]], 0 +; CHECK-NEXT: [[TMP108:%.*]] = icmp slt i32 [[SUB_3_7]], 0 ; CHECK-NEXT: [[NEG_3_7:%.*]] = sub nsw i32 0, [[SUB_3_7]] -; CHECK-NEXT: [[TMP112:%.*]] = select i1 [[TMP111]], i32 [[NEG_3_7]], i32 [[SUB_3_7]] -; CHECK-NEXT: [[CMP12_3_7:%.*]] = icmp slt i32 [[TMP112]], [[SPEC_SELECT8_2_7]] -; CHECK-NEXT: [[TMP113:%.*]] = or i1 [[CMP12_3_7]], [[TMP110]] -; CHECK-NEXT: [[SPEC_SELECT_3_7:%.*]] = select i1 [[TMP113]], i32 7, i32 [[SPEC_SELECT_3_6]] -; CHECK-NEXT: [[SPEC_SELECT8_3_7]] = select i1 [[CMP12_3_7]], i32 [[TMP112]], i32 [[SPEC_SELECT8_2_7]] +; CHECK-NEXT: [[TMP109:%.*]] = select i1 [[TMP108]], i32 [[NEG_3_7]], i32 [[SUB_3_7]] +; CHECK-NEXT: [[CMP12_3_7:%.*]] = icmp slt i32 [[TMP109]], [[SPEC_SELECT8_2_7]] +; CHECK-NEXT: [[TMP110:%.*]] = or i1 [[CMP12_3_7]], [[TMP107]] +; CHECK-NEXT: [[SPEC_SELECT_3_7:%.*]] = select i1 [[TMP110]], i32 7, i32 [[SPEC_SELECT_3_6]] +; CHECK-NEXT: [[SPEC_SELECT8_3_7]] = select i1 [[CMP12_3_7]], i32 [[TMP109]], i32 [[SPEC_SELECT8_2_7]] ; CHECK-NEXT: [[K:%.*]] = getelementptr inbounds [366 x i32], [366 x i32]* @l, i64 0, i64 [[INDVARS_IV]] ; CHECK-NEXT: store i32 [[SPEC_SELECT_3_7]], i32* [[K]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 diff --git a/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll b/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll --- a/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll +++ b/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll @@ -137,53 +137,23 @@ ; MAX256-NEXT: [[I6:%.*]] = fpext half [[HVAL]] to float ; MAX256-NEXT: [[I9:%.*]] = fpext half [[HVAL]] to float ; MAX256-NEXT: [[TMP0:%.*]] = insertelement <8 x float> poison, float [[I]], i32 0 -; MAX256-NEXT: [[TMP1:%.*]] = insertelement <8 x float> [[TMP0]], float [[I]], i32 1 -; MAX256-NEXT: [[TMP2:%.*]] = insertelement <8 x float> [[TMP1]], float [[I]], i32 2 -; MAX256-NEXT: [[TMP3:%.*]] = insertelement <8 x float> [[TMP2]], float [[I]], i32 3 -; MAX256-NEXT: [[TMP4:%.*]] = insertelement <8 x float> [[TMP3]], float [[I]], i32 4 -; MAX256-NEXT: [[TMP5:%.*]] = insertelement <8 x float> [[TMP4]], float [[I]], i32 5 -; MAX256-NEXT: [[TMP6:%.*]] = insertelement <8 x float> [[TMP5]], float [[I]], i32 6 -; MAX256-NEXT: [[TMP7:%.*]] = insertelement <8 x float> [[TMP6]], float [[I]], i32 7 -; MAX256-NEXT: [[TMP8:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0 -; MAX256-NEXT: [[TMP9:%.*]] = insertelement <8 x float> [[TMP8]], float [[FVAL]], i32 1 -; MAX256-NEXT: [[TMP10:%.*]] = insertelement <8 x float> [[TMP9]], float [[FVAL]], i32 2 -; MAX256-NEXT: [[TMP11:%.*]] = insertelement <8 x float> [[TMP10]], float [[FVAL]], i32 3 -; MAX256-NEXT: [[TMP12:%.*]] = insertelement <8 x float> [[TMP11]], float [[FVAL]], i32 4 -; MAX256-NEXT: [[TMP13:%.*]] = insertelement <8 x float> [[TMP12]], float [[FVAL]], i32 5 -; MAX256-NEXT: [[TMP14:%.*]] = insertelement <8 x float> [[TMP13]], float [[FVAL]], i32 6 -; MAX256-NEXT: [[TMP15:%.*]] = insertelement <8 x float> [[TMP14]], float [[FVAL]], i32 7 -; MAX256-NEXT: [[TMP16:%.*]] = fmul <8 x float> [[TMP7]], [[TMP15]] -; MAX256-NEXT: [[TMP17:%.*]] = fadd <8 x float> zeroinitializer, [[TMP16]] -; MAX256-NEXT: [[TMP18:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0 -; MAX256-NEXT: [[TMP19:%.*]] = insertelement <8 x float> [[TMP18]], float [[I3]], i32 1 -; MAX256-NEXT: [[TMP20:%.*]] = insertelement <8 x float> [[TMP19]], float [[I3]], i32 2 -; MAX256-NEXT: [[TMP21:%.*]] = insertelement <8 x float> [[TMP20]], float [[I3]], i32 3 -; MAX256-NEXT: [[TMP22:%.*]] = insertelement <8 x float> [[TMP21]], float [[I3]], i32 4 -; MAX256-NEXT: [[TMP23:%.*]] = insertelement <8 x float> [[TMP22]], float [[I3]], i32 5 -; MAX256-NEXT: [[TMP24:%.*]] = insertelement <8 x float> [[TMP23]], float [[I3]], i32 6 -; MAX256-NEXT: [[TMP25:%.*]] = insertelement <8 x float> [[TMP24]], float [[I3]], i32 7 -; MAX256-NEXT: [[TMP26:%.*]] = fmul <8 x float> [[TMP25]], [[TMP15]] -; MAX256-NEXT: [[TMP27:%.*]] = fadd <8 x float> zeroinitializer, [[TMP26]] -; MAX256-NEXT: [[TMP28:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0 -; MAX256-NEXT: [[TMP29:%.*]] = insertelement <8 x float> [[TMP28]], float [[I6]], i32 1 -; MAX256-NEXT: [[TMP30:%.*]] = insertelement <8 x float> [[TMP29]], float [[I6]], i32 2 -; MAX256-NEXT: [[TMP31:%.*]] = insertelement <8 x float> [[TMP30]], float [[I6]], i32 3 -; MAX256-NEXT: [[TMP32:%.*]] = insertelement <8 x float> [[TMP31]], float [[I6]], i32 4 -; MAX256-NEXT: [[TMP33:%.*]] = insertelement <8 x float> [[TMP32]], float [[I6]], i32 5 -; MAX256-NEXT: [[TMP34:%.*]] = insertelement <8 x float> [[TMP33]], float [[I6]], i32 6 -; MAX256-NEXT: [[TMP35:%.*]] = insertelement <8 x float> [[TMP34]], float [[I6]], i32 7 -; MAX256-NEXT: [[TMP36:%.*]] = fmul <8 x float> [[TMP35]], [[TMP15]] -; MAX256-NEXT: [[TMP37:%.*]] = fadd <8 x float> zeroinitializer, [[TMP36]] -; MAX256-NEXT: [[TMP38:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0 -; MAX256-NEXT: [[TMP39:%.*]] = insertelement <8 x float> [[TMP38]], float [[I9]], i32 1 -; MAX256-NEXT: [[TMP40:%.*]] = insertelement <8 x float> [[TMP39]], float [[I9]], i32 2 -; MAX256-NEXT: [[TMP41:%.*]] = insertelement <8 x float> [[TMP40]], float [[I9]], i32 3 -; MAX256-NEXT: [[TMP42:%.*]] = insertelement <8 x float> [[TMP41]], float [[I9]], i32 4 -; MAX256-NEXT: [[TMP43:%.*]] = insertelement <8 x float> [[TMP42]], float [[I9]], i32 5 -; MAX256-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[I9]], i32 6 -; MAX256-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[I9]], i32 7 -; MAX256-NEXT: [[TMP46:%.*]] = fmul <8 x float> [[TMP45]], [[TMP15]] -; MAX256-NEXT: [[TMP47:%.*]] = fadd <8 x float> zeroinitializer, [[TMP46]] +; MAX256-NEXT: [[SHUFFLE6:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <8 x i32> zeroinitializer +; MAX256-NEXT: [[TMP1:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0 +; MAX256-NEXT: [[SHUFFLE7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <8 x i32> zeroinitializer +; MAX256-NEXT: [[TMP2:%.*]] = fmul <8 x float> [[SHUFFLE6]], [[SHUFFLE7]] +; MAX256-NEXT: [[TMP3:%.*]] = fadd <8 x float> zeroinitializer, [[TMP2]] +; MAX256-NEXT: [[TMP4:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0 +; MAX256-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> poison, <8 x i32> zeroinitializer +; MAX256-NEXT: [[TMP5:%.*]] = fmul <8 x float> [[SHUFFLE]], [[SHUFFLE7]] +; MAX256-NEXT: [[TMP6:%.*]] = fadd <8 x float> zeroinitializer, [[TMP5]] +; MAX256-NEXT: [[TMP7:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0 +; MAX256-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> poison, <8 x i32> zeroinitializer +; MAX256-NEXT: [[TMP8:%.*]] = fmul <8 x float> [[SHUFFLE2]], [[SHUFFLE7]] +; MAX256-NEXT: [[TMP9:%.*]] = fadd <8 x float> zeroinitializer, [[TMP8]] +; MAX256-NEXT: [[TMP10:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0 +; MAX256-NEXT: [[SHUFFLE4:%.*]] = shufflevector <8 x float> [[TMP10]], <8 x float> poison, <8 x i32> zeroinitializer +; MAX256-NEXT: [[TMP11:%.*]] = fmul <8 x float> [[SHUFFLE4]], [[SHUFFLE7]] +; MAX256-NEXT: [[TMP12:%.*]] = fadd <8 x float> zeroinitializer, [[TMP11]] ; MAX256-NEXT: switch i32 undef, label [[BB5:%.*]] [ ; MAX256-NEXT: i32 0, label [[BB2:%.*]] ; MAX256-NEXT: i32 1, label [[BB3:%.*]] @@ -196,12 +166,12 @@ ; MAX256: bb5: ; MAX256-NEXT: br label [[BB2]] ; MAX256: bb2: -; MAX256-NEXT: [[TMP48:%.*]] = phi <8 x float> [ [[TMP27]], [[BB3]] ], [ [[TMP15]], [[BB4]] ], [ [[TMP15]], [[BB5]] ], [ [[TMP15]], [[BB1]] ] -; MAX256-NEXT: [[TMP49:%.*]] = phi <8 x float> [ [[TMP37]], [[BB3]] ], [ [[TMP15]], [[BB4]] ], [ [[TMP37]], [[BB5]] ], [ [[TMP37]], [[BB1]] ] -; MAX256-NEXT: [[TMP50:%.*]] = phi <8 x float> [ [[TMP47]], [[BB3]] ], [ [[TMP47]], [[BB4]] ], [ [[TMP15]], [[BB5]] ], [ [[TMP47]], [[BB1]] ] -; MAX256-NEXT: [[TMP51:%.*]] = phi <8 x float> [ [[TMP17]], [[BB3]] ], [ [[TMP17]], [[BB4]] ], [ [[TMP17]], [[BB5]] ], [ [[TMP15]], [[BB1]] ] -; MAX256-NEXT: [[TMP52:%.*]] = extractelement <8 x float> [[TMP49]], i32 7 -; MAX256-NEXT: store float [[TMP52]], float* undef, align 4 +; MAX256-NEXT: [[TMP13:%.*]] = phi <8 x float> [ [[TMP6]], [[BB3]] ], [ [[SHUFFLE7]], [[BB4]] ], [ [[SHUFFLE7]], [[BB5]] ], [ [[SHUFFLE7]], [[BB1]] ] +; MAX256-NEXT: [[TMP14:%.*]] = phi <8 x float> [ [[TMP9]], [[BB3]] ], [ [[SHUFFLE7]], [[BB4]] ], [ [[TMP9]], [[BB5]] ], [ [[TMP9]], [[BB1]] ] +; MAX256-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], [[BB3]] ], [ [[TMP12]], [[BB4]] ], [ [[SHUFFLE7]], [[BB5]] ], [ [[TMP12]], [[BB1]] ] +; MAX256-NEXT: [[TMP16:%.*]] = phi <8 x float> [ [[TMP3]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[SHUFFLE7]], [[BB1]] ] +; MAX256-NEXT: [[TMP17:%.*]] = extractelement <8 x float> [[TMP14]], i32 7 +; MAX256-NEXT: store float [[TMP17]], float* undef, align 4 ; MAX256-NEXT: ret void ; ; MAX1024-LABEL: @phi_float32( @@ -213,53 +183,23 @@ ; MAX1024-NEXT: [[I6:%.*]] = fpext half [[HVAL]] to float ; MAX1024-NEXT: [[I9:%.*]] = fpext half [[HVAL]] to float ; MAX1024-NEXT: [[TMP0:%.*]] = insertelement <8 x float> poison, float [[I]], i32 0 -; MAX1024-NEXT: [[TMP1:%.*]] = insertelement <8 x float> [[TMP0]], float [[I]], i32 1 -; MAX1024-NEXT: [[TMP2:%.*]] = insertelement <8 x float> [[TMP1]], float [[I]], i32 2 -; MAX1024-NEXT: [[TMP3:%.*]] = insertelement <8 x float> [[TMP2]], float [[I]], i32 3 -; MAX1024-NEXT: [[TMP4:%.*]] = insertelement <8 x float> [[TMP3]], float [[I]], i32 4 -; MAX1024-NEXT: [[TMP5:%.*]] = insertelement <8 x float> [[TMP4]], float [[I]], i32 5 -; MAX1024-NEXT: [[TMP6:%.*]] = insertelement <8 x float> [[TMP5]], float [[I]], i32 6 -; MAX1024-NEXT: [[TMP7:%.*]] = insertelement <8 x float> [[TMP6]], float [[I]], i32 7 -; MAX1024-NEXT: [[TMP8:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0 -; MAX1024-NEXT: [[TMP9:%.*]] = insertelement <8 x float> [[TMP8]], float [[FVAL]], i32 1 -; MAX1024-NEXT: [[TMP10:%.*]] = insertelement <8 x float> [[TMP9]], float [[FVAL]], i32 2 -; MAX1024-NEXT: [[TMP11:%.*]] = insertelement <8 x float> [[TMP10]], float [[FVAL]], i32 3 -; MAX1024-NEXT: [[TMP12:%.*]] = insertelement <8 x float> [[TMP11]], float [[FVAL]], i32 4 -; MAX1024-NEXT: [[TMP13:%.*]] = insertelement <8 x float> [[TMP12]], float [[FVAL]], i32 5 -; MAX1024-NEXT: [[TMP14:%.*]] = insertelement <8 x float> [[TMP13]], float [[FVAL]], i32 6 -; MAX1024-NEXT: [[TMP15:%.*]] = insertelement <8 x float> [[TMP14]], float [[FVAL]], i32 7 -; MAX1024-NEXT: [[TMP16:%.*]] = fmul <8 x float> [[TMP7]], [[TMP15]] -; MAX1024-NEXT: [[TMP17:%.*]] = fadd <8 x float> zeroinitializer, [[TMP16]] -; MAX1024-NEXT: [[TMP18:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0 -; MAX1024-NEXT: [[TMP19:%.*]] = insertelement <8 x float> [[TMP18]], float [[I3]], i32 1 -; MAX1024-NEXT: [[TMP20:%.*]] = insertelement <8 x float> [[TMP19]], float [[I3]], i32 2 -; MAX1024-NEXT: [[TMP21:%.*]] = insertelement <8 x float> [[TMP20]], float [[I3]], i32 3 -; MAX1024-NEXT: [[TMP22:%.*]] = insertelement <8 x float> [[TMP21]], float [[I3]], i32 4 -; MAX1024-NEXT: [[TMP23:%.*]] = insertelement <8 x float> [[TMP22]], float [[I3]], i32 5 -; MAX1024-NEXT: [[TMP24:%.*]] = insertelement <8 x float> [[TMP23]], float [[I3]], i32 6 -; MAX1024-NEXT: [[TMP25:%.*]] = insertelement <8 x float> [[TMP24]], float [[I3]], i32 7 -; MAX1024-NEXT: [[TMP26:%.*]] = fmul <8 x float> [[TMP25]], [[TMP15]] -; MAX1024-NEXT: [[TMP27:%.*]] = fadd <8 x float> zeroinitializer, [[TMP26]] -; MAX1024-NEXT: [[TMP28:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0 -; MAX1024-NEXT: [[TMP29:%.*]] = insertelement <8 x float> [[TMP28]], float [[I6]], i32 1 -; MAX1024-NEXT: [[TMP30:%.*]] = insertelement <8 x float> [[TMP29]], float [[I6]], i32 2 -; MAX1024-NEXT: [[TMP31:%.*]] = insertelement <8 x float> [[TMP30]], float [[I6]], i32 3 -; MAX1024-NEXT: [[TMP32:%.*]] = insertelement <8 x float> [[TMP31]], float [[I6]], i32 4 -; MAX1024-NEXT: [[TMP33:%.*]] = insertelement <8 x float> [[TMP32]], float [[I6]], i32 5 -; MAX1024-NEXT: [[TMP34:%.*]] = insertelement <8 x float> [[TMP33]], float [[I6]], i32 6 -; MAX1024-NEXT: [[TMP35:%.*]] = insertelement <8 x float> [[TMP34]], float [[I6]], i32 7 -; MAX1024-NEXT: [[TMP36:%.*]] = fmul <8 x float> [[TMP35]], [[TMP15]] -; MAX1024-NEXT: [[TMP37:%.*]] = fadd <8 x float> zeroinitializer, [[TMP36]] -; MAX1024-NEXT: [[TMP38:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0 -; MAX1024-NEXT: [[TMP39:%.*]] = insertelement <8 x float> [[TMP38]], float [[I9]], i32 1 -; MAX1024-NEXT: [[TMP40:%.*]] = insertelement <8 x float> [[TMP39]], float [[I9]], i32 2 -; MAX1024-NEXT: [[TMP41:%.*]] = insertelement <8 x float> [[TMP40]], float [[I9]], i32 3 -; MAX1024-NEXT: [[TMP42:%.*]] = insertelement <8 x float> [[TMP41]], float [[I9]], i32 4 -; MAX1024-NEXT: [[TMP43:%.*]] = insertelement <8 x float> [[TMP42]], float [[I9]], i32 5 -; MAX1024-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[I9]], i32 6 -; MAX1024-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[I9]], i32 7 -; MAX1024-NEXT: [[TMP46:%.*]] = fmul <8 x float> [[TMP45]], [[TMP15]] -; MAX1024-NEXT: [[TMP47:%.*]] = fadd <8 x float> zeroinitializer, [[TMP46]] +; MAX1024-NEXT: [[SHUFFLE6:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <8 x i32> zeroinitializer +; MAX1024-NEXT: [[TMP1:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0 +; MAX1024-NEXT: [[SHUFFLE7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <8 x i32> zeroinitializer +; MAX1024-NEXT: [[TMP2:%.*]] = fmul <8 x float> [[SHUFFLE6]], [[SHUFFLE7]] +; MAX1024-NEXT: [[TMP3:%.*]] = fadd <8 x float> zeroinitializer, [[TMP2]] +; MAX1024-NEXT: [[TMP4:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0 +; MAX1024-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> poison, <8 x i32> zeroinitializer +; MAX1024-NEXT: [[TMP5:%.*]] = fmul <8 x float> [[SHUFFLE]], [[SHUFFLE7]] +; MAX1024-NEXT: [[TMP6:%.*]] = fadd <8 x float> zeroinitializer, [[TMP5]] +; MAX1024-NEXT: [[TMP7:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0 +; MAX1024-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> poison, <8 x i32> zeroinitializer +; MAX1024-NEXT: [[TMP8:%.*]] = fmul <8 x float> [[SHUFFLE2]], [[SHUFFLE7]] +; MAX1024-NEXT: [[TMP9:%.*]] = fadd <8 x float> zeroinitializer, [[TMP8]] +; MAX1024-NEXT: [[TMP10:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0 +; MAX1024-NEXT: [[SHUFFLE4:%.*]] = shufflevector <8 x float> [[TMP10]], <8 x float> poison, <8 x i32> zeroinitializer +; MAX1024-NEXT: [[TMP11:%.*]] = fmul <8 x float> [[SHUFFLE4]], [[SHUFFLE7]] +; MAX1024-NEXT: [[TMP12:%.*]] = fadd <8 x float> zeroinitializer, [[TMP11]] ; MAX1024-NEXT: switch i32 undef, label [[BB5:%.*]] [ ; MAX1024-NEXT: i32 0, label [[BB2:%.*]] ; MAX1024-NEXT: i32 1, label [[BB3:%.*]] @@ -272,12 +212,12 @@ ; MAX1024: bb5: ; MAX1024-NEXT: br label [[BB2]] ; MAX1024: bb2: -; MAX1024-NEXT: [[TMP48:%.*]] = phi <8 x float> [ [[TMP27]], [[BB3]] ], [ [[TMP15]], [[BB4]] ], [ [[TMP15]], [[BB5]] ], [ [[TMP15]], [[BB1]] ] -; MAX1024-NEXT: [[TMP49:%.*]] = phi <8 x float> [ [[TMP37]], [[BB3]] ], [ [[TMP15]], [[BB4]] ], [ [[TMP37]], [[BB5]] ], [ [[TMP37]], [[BB1]] ] -; MAX1024-NEXT: [[TMP50:%.*]] = phi <8 x float> [ [[TMP47]], [[BB3]] ], [ [[TMP47]], [[BB4]] ], [ [[TMP15]], [[BB5]] ], [ [[TMP47]], [[BB1]] ] -; MAX1024-NEXT: [[TMP51:%.*]] = phi <8 x float> [ [[TMP17]], [[BB3]] ], [ [[TMP17]], [[BB4]] ], [ [[TMP17]], [[BB5]] ], [ [[TMP15]], [[BB1]] ] -; MAX1024-NEXT: [[TMP52:%.*]] = extractelement <8 x float> [[TMP49]], i32 7 -; MAX1024-NEXT: store float [[TMP52]], float* undef, align 4 +; MAX1024-NEXT: [[TMP13:%.*]] = phi <8 x float> [ [[TMP6]], [[BB3]] ], [ [[SHUFFLE7]], [[BB4]] ], [ [[SHUFFLE7]], [[BB5]] ], [ [[SHUFFLE7]], [[BB1]] ] +; MAX1024-NEXT: [[TMP14:%.*]] = phi <8 x float> [ [[TMP9]], [[BB3]] ], [ [[SHUFFLE7]], [[BB4]] ], [ [[TMP9]], [[BB5]] ], [ [[TMP9]], [[BB1]] ] +; MAX1024-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], [[BB3]] ], [ [[TMP12]], [[BB4]] ], [ [[SHUFFLE7]], [[BB5]] ], [ [[TMP12]], [[BB1]] ] +; MAX1024-NEXT: [[TMP16:%.*]] = phi <8 x float> [ [[TMP3]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[SHUFFLE7]], [[BB1]] ] +; MAX1024-NEXT: [[TMP17:%.*]] = extractelement <8 x float> [[TMP14]], i32 7 +; MAX1024-NEXT: store float [[TMP17]], float* undef, align 4 ; MAX1024-NEXT: ret void ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll --- a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll @@ -43,8 +43,8 @@ define void @test2() { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> poison, <4 x i32> ) -; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <4 x i32> poison, [[TMP0]] +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <4 x i32> undef, [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP2]], i32 77) ; CHECK-NEXT: [[E:%.*]] = icmp ugt i32 [[TMP3]], 1