diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6878,8 +6878,9 @@ } /// Check if two insertelement instructions are from the same buildvector. -static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU, - InsertElementInst *V) { +static bool areTwoInsertFromSameBuildVector( + InsertElementInst *VU, InsertElementInst *V, + function_ref GetBaseOperand) { // Instructions must be from the same basic blocks. if (VU->getParent() != V->getParent()) return false; @@ -6906,14 +6907,14 @@ getInsertIndex(IE1).value_or(Idx2) == Idx2) IE1 = nullptr; else - IE1 = dyn_cast(IE1->getOperand(0)); + IE1 = dyn_cast_or_null(GetBaseOperand(IE1)); } if (IE2) { if ((IE2 != V && !IE2->hasOneUse()) || getInsertIndex(IE2).value_or(Idx1) == Idx1) IE2 = nullptr; else - IE2 = dyn_cast(IE2->getOperand(0)); + IE2 = dyn_cast_or_null(GetBaseOperand(IE2)); } } while (IE1 || IE2); return false; @@ -7117,12 +7118,18 @@ Optional InsertIdx = getInsertIndex(VU); if (InsertIdx) { const TreeEntry *ScalarTE = getTreeEntry(EU.Scalar); - auto *It = - find_if(FirstUsers, - [VU](const std::pair &Pair) { - return areTwoInsertFromSameBuildVector( - VU, cast(Pair.first)); - }); + auto *It = find_if( + FirstUsers, + [this, VU](const std::pair &Pair) { + return areTwoInsertFromSameBuildVector( + VU, cast(Pair.first), + [this](InsertElementInst *II) -> Value * { + Value *Op0 = II->getOperand(0); + if (getTreeEntry(II) && !getTreeEntry(Op0)) + return nullptr; + return Op0; + }); + }); int VecId = -1; if (It == FirstUsers.end()) { (void)ShuffleMasks.emplace_back(); @@ -8590,7 +8597,9 @@ find_if(ShuffledInserts, [VU](const ShuffledInsertData &Data) { // Checks if 2 insertelements are from the same buildvector. InsertElementInst *VecInsert = Data.InsertElements.front(); - return areTwoInsertFromSameBuildVector(VU, VecInsert); + return areTwoInsertFromSameBuildVector( + VU, VecInsert, + [](InsertElementInst *II) { return II->getOperand(0); }); }); unsigned Idx = *InsertIdx; if (It == ShuffledInserts.end()) { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll @@ -24,6 +24,7 @@ ; CHECK-NEXT: [[T29:%.*]] = sub nsw i32 [[T9]], [[T15]] ; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T27]], [[T29]] ; CHECK-NEXT: [[T31:%.*]] = mul nsw i32 [[T30]], 4433 +; CHECK-NEXT: [[T32:%.*]] = mul nsw i32 [[T27]], 6270 ; CHECK-NEXT: [[T34:%.*]] = mul nsw i32 [[T29]], -15137 ; CHECK-NEXT: [[T37:%.*]] = add nsw i32 [[T25]], [[T11]] ; CHECK-NEXT: [[T38:%.*]] = add nsw i32 [[T17]], [[T5]] @@ -33,22 +34,20 @@ ; CHECK-NEXT: [[T42:%.*]] = mul nsw i32 [[T17]], 16819 ; CHECK-NEXT: [[T47:%.*]] = mul nsw i32 [[T37]], -16069 ; CHECK-NEXT: [[T48:%.*]] = mul nsw i32 [[T38]], -3196 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[T27]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[T47]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[T15]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[T40]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> , i32 [[T40]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[T9]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[T48]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[TMP11]], <8 x i32> [[TMP12]], <8 x i32> +; CHECK-NEXT: [[T49:%.*]] = add nsw i32 [[T40]], [[T47]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[T15]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[T40]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[T9]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[T48]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[T67:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T32]], i32 2 +; CHECK-NEXT: [[T68:%.*]] = insertelement <8 x i32> [[T67]], i32 [[T49]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP7]], <8 x i32> ; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6 -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i32> [[T71]], <8 x i32> [[TMP12]], <8 x i32> -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[TMP13]], +; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7 +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], ; CHECK-NEXT: [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>* ; CHECK-NEXT: store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4 ; CHECK-NEXT: ret void