diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -164,14 +164,6 @@ "slp-max-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for operand reordering scores")); -// The Look-ahead heuristic goes through the users of the bundle to calculate -// the users cost in getExternalUsesCost(). To avoid compilation time increase -// we limit the number of users visited to this value. -static cl::opt LookAheadUsersBudget( - "slp-look-ahead-users-budget", cl::init(2), cl::Hidden, - cl::desc("The maximum number of users to visit while visiting the " - "predecessors. This prevents compilation time increase.")); - static cl::opt ViewSLPTree("view-slp-tree", cl::Hidden, cl::desc("Display the SLP trees with Graphviz")); @@ -1047,14 +1039,15 @@ static const int ScoreUndef = 1; /// Score for failing to find a decent match. static const int ScoreFail = 0; - /// User exteranl to the vectorized code. - static const int ExternalUseCost = 1; - /// The user is internal but in a different lane. - static const int UserInDiffLaneCost = ExternalUseCost; + /// Score if all users are vectorized. + static const int ScoreAllUserVectorized = 1; /// \returns the score of placing \p V1 and \p V2 in consecutive lanes. + /// Also, checks if \p V1 and \p V2 are compatible with instructions in \p + /// MainAltOps. static int getShallowScore(Value *V1, Value *V2, const DataLayout &DL, - ScalarEvolution &SE, int NumLanes) { + ScalarEvolution &SE, int NumLanes, + ArrayRef MainAltOps) { if (V1 == V2) return VLOperands::ScoreSplat; @@ -1067,7 +1060,7 @@ Optional Dist = getPointersDiff( LI1->getType(), LI1->getPointerOperand(), LI2->getType(), LI2->getPointerOperand(), DL, SE, /*StrictCheck=*/true); - if (!Dist) + if (!Dist || *Dist == 0) return VLOperands::ScoreFail; // The distance is too large - still may be profitable to use masked // loads/gathers. @@ -1109,12 +1102,16 @@ int Dist = Idx2 - Idx1; // The distance is too large - still may be profitable to use // shuffles. + if (std::abs(Dist) == 0) + return VLOperands::ScoreSplat; if (std::abs(Dist) > NumLanes / 2) - return VLOperands::ScoreAltOpcodes; + return VLOperands::ScoreSameOpcode; return (Dist > 0) ? VLOperands::ScoreConsecutiveExtracts : VLOperands::ScoreReversedExtracts; } + return VLOperands::ScoreAltOpcodes; } + return VLOperands::ScoreFail; } auto *I1 = dyn_cast(V1); @@ -1122,10 +1119,19 @@ if (I1 && I2) { if (I1->getParent() != I2->getParent()) return VLOperands::ScoreFail; - InstructionsState S = getSameOpcode({I1, I2}); + SmallVector Ops(MainAltOps.begin(), MainAltOps.end()); + Ops.push_back(I1); + Ops.push_back(I2); + InstructionsState S = getSameOpcode(Ops); // Note: Only consider instructions with <= 2 operands to avoid // complexity explosion. - if (S.getOpcode() && S.MainOp->getNumOperands() <= 2) + if (S.getOpcode() && + (S.MainOp->getNumOperands() <= 2 || !MainAltOps.empty() || + !S.isAltShuffle()) && + all_of(Ops, [&S](Value *V) { + return cast(V)->getNumOperands() == + S.MainOp->getNumOperands(); + })) return S.isAltShuffle() ? VLOperands::ScoreAltOpcodes : VLOperands::ScoreSameOpcode; } @@ -1136,68 +1142,59 @@ return VLOperands::ScoreFail; } - /// Holds the values and their lanes that are taking part in the look-ahead - /// score calculation. This is used in the external uses cost calculation. - /// Need to hold all the lanes in case of splat/broadcast at least to - /// correctly check for the use in the different lane. - SmallDenseMap> InLookAheadValues; - - /// \returns the additional cost due to uses of \p LHS and \p RHS that are - /// either external to the vectorized code, or require shuffling. - int getExternalUsesCost(const std::pair &LHS, - const std::pair &RHS) { - int Cost = 0; - std::array, 2> Values = {{LHS, RHS}}; - for (int Idx = 0, IdxE = Values.size(); Idx != IdxE; ++Idx) { - Value *V = Values[Idx].first; - if (isa(V)) { - // Since this is a function pass, it doesn't make semantic sense to - // walk the users of a subclass of Constant. The users could be in - // another function, or even another module that happens to be in - // the same LLVMContext. + /// \param Lane lane of the operands under analysis. + /// \param OpIdx operand index in \p Lane lane we're looking the best + /// candidate for. + /// \param Idx operand index of the current candidate value. + /// \returns The additional score due to possible broadcasting of the + /// elements in the lane. It is more profitable to have power-of-2 unique + /// elements in the lane, it will be vectorized with higher probability + /// after removing duplicates. Currently the SLP vectorizer supports only + /// vectorization of the power-of-2 number of unique scalars. + int getSplatScore(unsigned Lane, unsigned OpIdx, unsigned Idx) const { + Value *IdxLaneV = getData(Idx, Lane).V; + if (!isa(IdxLaneV) || IdxLaneV == getData(OpIdx, Lane).V) + return 0; + SmallPtrSet Uniques; + for (unsigned Ln = 0, E = getNumLanes(); Ln < E; ++Ln) { + if (Ln == Lane) continue; - } - - // Calculate the absolute lane, using the minimum relative lane of LHS - // and RHS as base and Idx as the offset. - int Ln = std::min(LHS.second, RHS.second) + Idx; - assert(Ln >= 0 && "Bad lane calculation"); - unsigned UsersBudget = LookAheadUsersBudget; - for (User *U : V->users()) { - if (const TreeEntry *UserTE = R.getTreeEntry(U)) { - // The user is in the VectorizableTree. Check if we need to insert. - int UserLn = UserTE->findLaneForValue(U); - assert(UserLn >= 0 && "Bad lane"); - // If the values are different, check just the line of the current - // value. If the values are the same, need to add UserInDiffLaneCost - // only if UserLn does not match both line numbers. - if ((LHS.first != RHS.first && UserLn != Ln) || - (LHS.first == RHS.first && UserLn != LHS.second && - UserLn != RHS.second)) { - Cost += UserInDiffLaneCost; - break; - } - } else { - // Check if the user is in the look-ahead code. - auto It2 = InLookAheadValues.find(U); - if (It2 != InLookAheadValues.end()) { - // The user is in the look-ahead code. Check the lane. - if (!It2->getSecond().contains(Ln)) { - Cost += UserInDiffLaneCost; - break; - } - } else { - // The user is neither in SLP tree nor in the look-ahead code. - Cost += ExternalUseCost; - break; - } - } - // Limit the number of visited uses to cap compilation time. - if (--UsersBudget == 0) - break; - } - } - return Cost; + Value *OpIdxLnV = getData(OpIdx, Ln).V; + if (!isa(OpIdxLnV)) + return 0; + Uniques.insert(OpIdxLnV); + } + int UniquesCount = Uniques.size(); + int UniquesCntWithIdxLaneV = + Uniques.contains(IdxLaneV) ? UniquesCount : UniquesCount + 1; + Value *OpIdxLaneV = getData(OpIdx, Lane).V; + int UniquesCntWithOpIdxLaneV = + Uniques.contains(OpIdxLaneV) ? UniquesCount : UniquesCount + 1; + if (UniquesCntWithIdxLaneV == UniquesCntWithOpIdxLaneV) + return 0; + return (PowerOf2Ceil(UniquesCntWithOpIdxLaneV) - + UniquesCntWithOpIdxLaneV) - + (PowerOf2Ceil(UniquesCntWithIdxLaneV) - UniquesCntWithIdxLaneV); + } + + /// \param Lane lane of the operands under analysis. + /// \param OpIdx operand index in \p Lane lane we're looking the best + /// candidate for. + /// \param Idx operand index of the current candidate value. + /// \returns The additional score for the scalar which users are all + /// vectorized. + int getExternalUseScore(unsigned Lane, unsigned OpIdx, unsigned Idx) const { + Value *IdxLaneV = getData(Idx, Lane).V; + Value *OpIdxLaneV = getData(OpIdx, Lane).V; + if (isVectorLikeInstWithConstOps(IdxLaneV) && + isVectorLikeInstWithConstOps(OpIdxLaneV)) + return VLOperands::ScoreAllUserVectorized; + auto *IdxLaneI = dyn_cast(IdxLaneV); + if (!IdxLaneI || !isa(OpIdxLaneV)) + return 0; + return R.areAllUsersVectorized(IdxLaneI, None) + ? VLOperands::ScoreAllUserVectorized + : 0; } /// Go through the operands of \p LHS and \p RHS recursively until \p @@ -1221,18 +1218,12 @@ /// Look-ahead SLP: Auto-vectorization in the presence of commutative /// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha, /// Luís F. W. Góes - int getScoreAtLevelRec(const std::pair &LHS, - const std::pair &RHS, int CurrLevel, - int MaxLevel) { + int getScoreAtLevelRec(Value *LHS, Value *RHS, int CurrLevel, int MaxLevel, + ArrayRef MainAltOps) { - Value *V1 = LHS.first; - Value *V2 = RHS.first; // Get the shallow score of V1 and V2. - int ShallowScoreAtThisLevel = std::max( - (int)ScoreFail, getShallowScore(V1, V2, DL, SE, getNumLanes()) - - getExternalUsesCost(LHS, RHS)); - int Lane1 = LHS.second; - int Lane2 = RHS.second; + int ShallowScoreAtThisLevel = + getShallowScore(LHS, RHS, DL, SE, getNumLanes(), MainAltOps); // If reached MaxLevel, // or if V1 and V2 are not instructions, @@ -1240,20 +1231,17 @@ // or if they are not consecutive, // or if profitable to vectorize loads or extractelements, early return // the current cost. - auto *I1 = dyn_cast(V1); - auto *I2 = dyn_cast(V2); + auto *I1 = dyn_cast(LHS); + auto *I2 = dyn_cast(RHS); if (CurrLevel == MaxLevel || !(I1 && I2) || I1 == I2 || ShallowScoreAtThisLevel == VLOperands::ScoreFail || (((isa(I1) && isa(I2)) || + (I1->getNumOperands() > 2 && I2->getNumOperands() > 2) || (isa(I1) && isa(I2))) && ShallowScoreAtThisLevel)) return ShallowScoreAtThisLevel; assert(I1 && I2 && "Should have early exited."); - // Keep track of in-tree values for determining the external-use cost. - InLookAheadValues[V1].insert(Lane1); - InLookAheadValues[V2].insert(Lane2); - // Contains the I2 operand indexes that got matched with I1 operands. SmallSet Op2Used; @@ -1276,9 +1264,9 @@ if (Op2Used.count(OpIdx2)) continue; // Recursively calculate the cost at each level - int TmpScore = getScoreAtLevelRec({I1->getOperand(OpIdx1), Lane1}, - {I2->getOperand(OpIdx2), Lane2}, - CurrLevel + 1, MaxLevel); + int TmpScore = + getScoreAtLevelRec(I1->getOperand(OpIdx1), I2->getOperand(OpIdx2), + CurrLevel + 1, MaxLevel, None); // Look for the best score. if (TmpScore > VLOperands::ScoreFail && TmpScore > MaxTmpScore) { MaxTmpScore = TmpScore; @@ -1300,18 +1288,25 @@ /// score. This helps break ties in an informed way when we cannot decide on /// the order of the operands by just considering the immediate /// predecessors. - int getLookAheadScore(const std::pair &LHS, - const std::pair &RHS) { - InLookAheadValues.clear(); - return getScoreAtLevelRec(LHS, RHS, 1, LookAheadMaxDepth); + int getLookAheadScore(Value *LHS, Value *RHS, + ArrayRef MainAltOps) { + return getScoreAtLevelRec(LHS, RHS, 1, LookAheadMaxDepth, MainAltOps); } + /// Best defined scores per lanes between the passes. Used to choose the + /// best operand (with the highest score) between the passes. + /// The key - {Operand Index, Lane}. + /// The value - the best score between the passes for the lane and the + /// operand. + SmallDenseMap, unsigned, 8> + BestScoresPerLanes; + // Search all operands in Ops[*][Lane] for the one that matches best // Ops[OpIdx][LastLane] and return its opreand index. // If no good match can be found, return None. - Optional - getBestOperand(unsigned OpIdx, int Lane, int LastLane, - ArrayRef ReorderingModes) { + Optional getBestOperand(unsigned OpIdx, int Lane, int LastLane, + ArrayRef ReorderingModes, + ArrayRef MainAltOps) { unsigned NumOperands = getNumOperands(); // The operand of the previous lane at OpIdx. @@ -1319,6 +1314,8 @@ // Our strategy mode for OpIdx. ReorderingMode RMode = ReorderingModes[OpIdx]; + if (RMode == ReorderingMode::Failed) + return None; // The linearized opcode of the operand at OpIdx, Lane. bool OpIdxAPO = getData(OpIdx, Lane).APO; @@ -1330,7 +1327,15 @@ Optional Idx = None; unsigned Score = 0; } BestOp; - + BestOp.Score = + BestScoresPerLanes.try_emplace(std::make_pair(OpIdx, Lane), 0) + .first->second; + + // Track if the operand must be marked as used. If the operand is set to + // Score 1 explicitly (because of non power-of-2 unique scalars, we may + // want to reestimate the operands again on the following iterations). + bool IsUsed = + RMode == ReorderingMode::Splat || RMode == ReorderingMode::Constant; // Iterate through all unused operands and look for the best. for (unsigned Idx = 0; Idx != NumOperands; ++Idx) { // Get the operand at Idx and Lane. @@ -1356,11 +1361,29 @@ bool LeftToRight = Lane > LastLane; Value *OpLeft = (LeftToRight) ? OpLastLane : Op; Value *OpRight = (LeftToRight) ? Op : OpLastLane; - unsigned Score = - getLookAheadScore({OpLeft, LastLane}, {OpRight, Lane}); - if (Score > BestOp.Score) { + int Score = getLookAheadScore(OpLeft, OpRight, MainAltOps); + if (Score) { + int SplatScore = getSplatScore(Lane, OpIdx, Idx); + if (Score <= -SplatScore) { + // Set the minimum score for splat-like sequence to avoid setting + // failed state. + Score = 1; + } else { + Score += SplatScore; + // Scale score to see the difference between different operands + // and similar operands but all vectorized/not all vectorized + // uses. It does not affect actual selection of the best + // compatible operand in general, just allows to select the + // operand with all vectorized uses. + Score *= 10; + Score += getExternalUseScore(Lane, OpIdx, Idx); + IsUsed = true; + } + } + if (Score > static_cast(BestOp.Score)) { BestOp.Idx = Idx; BestOp.Score = Score; + BestScoresPerLanes[std::make_pair(OpIdx, Lane)] = Score; } break; } @@ -1369,12 +1392,12 @@ BestOp.Idx = Idx; break; case ReorderingMode::Failed: - return None; + llvm_unreachable("Not expected Failed reordering mode."); } } if (BestOp.Idx) { - getData(BestOp.Idx.getValue(), Lane).IsUsed = true; + getData(BestOp.Idx.getValue(), Lane).IsUsed = IsUsed; return BestOp.Idx; } // If we could not find a good match return None. @@ -1691,6 +1714,10 @@ // rest of the lanes. We are visiting the nodes in a circular fashion, // using FirstLane as the center point and increasing the radius // distance. + SmallVector> MainAltOps(NumOperands); + for (unsigned I = 0; I < NumOperands; ++I) + MainAltOps[I].push_back(getData(I, FirstLane).V); + for (unsigned Distance = 1; Distance != NumLanes; ++Distance) { // Visit the lane on the right and then the lane on the left. for (int Direction : {+1, -1}) { @@ -1703,8 +1730,8 @@ // Look for a good match for each operand. for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { // Search for the operand that matches SortedOps[OpIdx][Lane-1]. - Optional BestIdx = - getBestOperand(OpIdx, Lane, LastLane, ReorderingModes); + Optional BestIdx = getBestOperand( + OpIdx, Lane, LastLane, ReorderingModes, MainAltOps[OpIdx]); // By not selecting a value, we allow the operands that follow to // select a better matching value. We will get a non-null value in // the next run of getBestOperand(). @@ -1718,6 +1745,14 @@ // Enable the second pass. StrategyFailed = true; } + // Try to get the alternate opcode and follow it during analysis. + if (MainAltOps[OpIdx].size() != 2) { + OperandData &AltOp = getData(OpIdx, Lane); + InstructionsState OpS = + getSameOpcode({MainAltOps[OpIdx].front(), AltOp.V}); + if (OpS.getOpcode() && OpS.isAltShuffle()) + MainAltOps[OpIdx].push_back(AltOp.V); + } } } } @@ -4494,7 +4529,9 @@ ArrayRef VectorizedVals) const { return (I->hasOneUse() && is_contained(VectorizedVals, I)) || all_of(I->users(), [this](User *U) { - return ScalarToTreeEntry.count(U) > 0 || MustGather.contains(U); + return ScalarToTreeEntry.count(U) > 0 || + isVectorLikeInstWithConstOps(U) || + (isa(U) && MustGather.contains(U)); }); } @@ -8434,7 +8471,9 @@ if (R.isTreeTinyAndNotFullyVectorizable()) continue; R.reorderTopToBottom(); - R.reorderBottomToTop(); + // TODO: add support for more kinds of the instructions here. + R.reorderBottomToTop( + all_of(Ops, [](Value *V) { return isa(V); })); R.buildExternalUses(); R.computeMinimumValueSizes(); diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll @@ -172,12 +172,11 @@ ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP4]], [[TMP3]] -; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP3_31:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = xor <2 x i32> [[V0]], [[V1]] +; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i32> [[TMP4]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[SHUFFLE]], [[TMP6]] +; CHECK-NEXT: [[TMP3_31:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP8]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[TMP3_31]] ; %v0.0 = extractelement <2 x i32> %v0, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll @@ -172,12 +172,11 @@ ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP4]], [[TMP3]] -; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP3_31:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = xor <2 x i32> [[V0]], [[V1]] +; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i32> [[TMP4]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[SHUFFLE]], [[TMP6]] +; CHECK-NEXT: [[TMP3_31:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP8]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[TMP3_31]] ; %v0.0 = extractelement <2 x i32> %v0, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -slp-vectorizer -slp-min-tree-size=2 -slp-threshold=-1000 -slp-max-look-ahead-depth=1 -slp-look-ahead-users-budget=1 -slp-schedule-budget=27 -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +; RUN: opt < %s -slp-vectorizer -slp-min-tree-size=2 -slp-threshold=-1000 -slp-max-look-ahead-depth=1 -slp-schedule-budget=27 -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s define void @exceed(double %0, double %1) { ; CHECK-LABEL: @exceed( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll @@ -42,10 +42,10 @@ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[T15]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[T40]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[T27]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[T40]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[T47]], i32 3 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> , i32 [[T9]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[T48]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[T47]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[T40]], i32 3 ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll @@ -42,10 +42,10 @@ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[T15]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[T40]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[T27]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[T40]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[T47]], i32 3 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> , i32 [[T9]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[T48]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[T47]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[T40]], i32 3 ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32>