diff --git a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp --- a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp @@ -592,7 +592,7 @@ Value *A = nullptr, *B = nullptr; MaxMinT m_MaxMin(m_Value(A), m_Value(B)); for (unsigned int i = 0; i < 2; ++i) { - if (match(LHS, m_MaxMin)) { + if (LHS->getNumUses() <= 2 && match(LHS, m_MaxMin)) { const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B); const SCEV *RHSExpr = SE->getSCEV(RHS); for (unsigned int j = 0; j < 2; ++j) { @@ -610,6 +610,12 @@ std::swap(AExpr, RHSExpr); } + // The optimization is profitable only if LHS can be removed in the end. + // In other words LHS should be used (directly or indirectly) by I only. + for (User *U : LHS->users()) + if (U != I || !(U->hasOneUser() && *U->users().begin() == I)) + continue; + SCEVExpander Expander(*SE, *DL, "nary-reassociate"); SmallVector Ops1{ BExpr, AExpr }; const SCEVTypes SCEVType = convertToSCEVype(m_MaxMin); diff --git a/llvm/test/Transforms/NaryReassociate/nary-req.ll b/llvm/test/Transforms/NaryReassociate/nary-req.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/NaryReassociate/nary-req.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -nary-reassociate -S | FileCheck %s +; RUN: opt < %s -passes='nary-reassociate' -S | FileCheck %s + +declare i32 @llvm.smax.i32(i32 %a, i32 %b) + +; This is a negative test. We should not optimize if intermediate result +; has a use outside of optimizaple pattern. In other words %smax2 has one +; use from %smax3 and side use from %res2. +define i32 @smax_test1(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @smax_test1( +; CHECK-NEXT: [[C1:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]] +; CHECK-NEXT: [[C2:%.*]] = icmp sgt i32 [[B]], [[C:%.*]] +; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]] +; CHECK-NEXT: [[C3:%.*]] = icmp sgt i32 [[SMAX2]], [[A]] +; CHECK-NEXT: [[SMAX3:%.*]] = select i1 [[C3]], i32 [[SMAX2]], i32 [[A]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMAX1]], [[SMAX3]] +; CHECK-NEXT: [[RES2:%.*]] = add i32 [[RES]], [[SMAX2]] +; CHECK-NEXT: ret i32 [[RES]] +; + %c1 = icmp sgt i32 %a, %b + %smax1 = select i1 %c1, i32 %a, i32 %b + %c2 = icmp sgt i32 %b, %c + %smax2 = select i1 %c2, i32 %b, i32 %c + %c3 = icmp sgt i32 %smax2, %a + %smax3 = select i1 %c3, i32 %smax2, i32 %a + %res = add i32 %smax1, %smax3 + %res2 = add i32 %res, %smax2 + ret i32 %res +} +