Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3780,6 +3780,43 @@ (TopToBottom && isa(TE.getMainOp()))) && !TE.isAltShuffle()) return TE.ReorderIndices; + if (TE.State == TreeEntry::Vectorize && (isa(TE.getMainOp()))) { + auto PHICompare = [](llvm::Value *V1, llvm::Value *V2) { + if (!V1->user_empty() && !V2->user_empty()) { + auto *FirstUserOfPhi1 = cast(*V1->user_begin()); + auto *FirstUserOfPhi2 = cast(*V2->user_begin()); + if (FirstUserOfPhi1->getParent() == FirstUserOfPhi2->getParent()) { + if (auto *IE1 = dyn_cast(FirstUserOfPhi1)) + if (auto *IE2 = dyn_cast(FirstUserOfPhi2)) + return *getInsertIndex(IE1) < *getInsertIndex(IE2); + + if (auto *EE1 = dyn_cast(FirstUserOfPhi1)) + if (auto *EE2 = dyn_cast(FirstUserOfPhi2)) + return *getExtractIndex(EE1) < *getExtractIndex(EE2); + } + } + return false; + }; + auto IsIdentityOrder = [](const OrdersType &Order) { + for (unsigned Idx : seq(0, Order.size())) + if (Idx != Order[Idx]) + return false; + return true; + }; + DenseMap PhiToId; + SmallVector Phis; + OrdersType ResOrder(TE.Scalars.size()); + for (unsigned Id = 0, Sz = TE.Scalars.size(); Id < Sz; ++Id) { + PhiToId[TE.Scalars[Id]] = Id; + Phis.push_back(TE.Scalars[Id]); + } + llvm::sort(Phis, PHICompare); + for (unsigned Id = 0, Sz = Phis.size(); Id < Sz; ++Id) + ResOrder[Id] = PhiToId[Phis[Id]]; + if (IsIdentityOrder(ResOrder)) + return None; + return ResOrder; + } if (TE.State == TreeEntry::NeedToGather) { // TODO: add analysis of other gather nodes with extractelement // instructions and other values/instructions, not only undefs. @@ -3857,6 +3894,9 @@ // their ordering. DenseMap GathersToOrders; + // Phi nodes can have preferred ordering based on their result users + DenseMap PhisToOrders; + // AltShuffles can also have a preferred ordering that leads to fewer // instructions, e.g., the addsub instruction in x86. DenseMap AltShufflesToOrders; @@ -3871,7 +3911,7 @@ // extracts. for_each(VectorizableTree, [this, &TTIRef, &VFToOrderedEntries, &GathersToOrders, &ExternalUserReorderMap, - &AltShufflesToOrders]( + &AltShufflesToOrders, &PhisToOrders]( const std::unique_ptr &TE) { // Look for external users that will probably be vectorized. SmallVector ExternalUserReorderIndices = @@ -3928,6 +3968,8 @@ VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get()); if (TE->State != TreeEntry::Vectorize || !TE->ReuseShuffleIndices.empty()) GathersToOrders.try_emplace(TE.get(), *CurrentOrder); + if (TE->State == TreeEntry::Vectorize && isa(TE->getMainOp())) + PhisToOrders.try_emplace(TE.get(), *CurrentOrder); } }); @@ -3953,8 +3995,8 @@ if (!OpTE->ReuseShuffleIndices.empty() && !GathersToOrders.count(OpTE)) continue; // Count number of orders uses. - const auto &Order = [OpTE, &GathersToOrders, - &AltShufflesToOrders]() -> const OrdersType & { + const auto &Order = [OpTE, &GathersToOrders, &AltShufflesToOrders, + &PhisToOrders]() -> const OrdersType & { if (OpTE->State == TreeEntry::NeedToGather || !OpTE->ReuseShuffleIndices.empty()) { auto It = GathersToOrders.find(OpTE); @@ -3966,6 +4008,12 @@ if (It != AltShufflesToOrders.end()) return It->second; } + if (OpTE->State == TreeEntry::Vectorize && + isa(OpTE->getMainOp())) { + auto It = PhisToOrders.find(OpTE); + if (It != PhisToOrders.end()) + return It->second; + } return OpTE->ReorderIndices; }(); // First consider the order of the external scalar users. Index: llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll +++ llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll @@ -63,8 +63,8 @@ ; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x half> [[IN1]], i64 1 ; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1]], i64 2 ; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A1]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A0]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A1]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[A2]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[A3]], i32 1 ; CHECK-NEXT: br i1 [[CMP:%.*]], label [[BB1:%.*]], label [[BB0:%.*]] @@ -73,15 +73,15 @@ ; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x half> [[IN2]], i64 1 ; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2]], i64 2 ; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B1]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B0]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B1]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x half> poison, half [[B2]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x half> [[TMP6]], half [[B3]], i32 1 ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x half> [ [[TMP1]], %entry ], [ [[TMP5]], %bb0 ] ; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x half> [ [[TMP3]], %entry ], [ [[TMP7]], %bb0 ] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x half> [[TMP9]], <2 x half> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x i32> ; CHECK-NEXT: ret <4 x half> [[TMP12]]