diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3049,6 +3049,24 @@ const std::unique_ptr &TE) { if (Optional CurrentOrder = getReorderingData(*TE.get(), /*TopToBottom=*/true)) { + // Do not include ordering for nodes used in the alt opcode vectorization, + // better to reorder them during bottom-to-top stage. + unsigned Cnt = 0; + const TreeEntry *UserTE = TE.get(); + while (UserTE && Cnt < RecursionMaxDepth) { + if (UserTE->UserTreeIndices.size() != 1) + break; + if (all_of(UserTE->UserTreeIndices, [](const EdgeInfo &EI) { + return EI.UserTE->State == TreeEntry::Vectorize && + EI.UserTE->isAltShuffle() && EI.UserTE->Idx != 0; + })) + return; + if (UserTE->UserTreeIndices.empty()) + UserTE = nullptr; + else + UserTE = UserTE->UserTreeIndices.back().UserTE; + ++Cnt; + } VFToOrderedEntries[TE->Scalars.size()].insert(TE.get()); if (TE->State != TreeEntry::Vectorize) GathersToOrders.try_emplace(TE.get(), *CurrentOrder); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reordered-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reordered-list.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reordered-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reordered-list.ll @@ -14,10 +14,9 @@ ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x i32> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[ARRAYIDX10]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[SHUFFLE]], <2 x double>* [[TMP7]], align 8 +; CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8 ; CHECK-NEXT: ret void ; entry: