diff --git a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp --- a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp @@ -612,9 +612,18 @@ // The optimization is profitable only if LHS can be removed in the end. // In other words LHS should be used (directly or indirectly) by I only. - for (User *U : LHS->users()) - if (U != I || !(U->hasOneUser() && *U->users().begin() == I)) - continue; + bool hasExternalUser = false; + if (LHS->hasOneUser()) { + if (LHS->user_back() != I) + hasExternalUser = true; + } else { + for (User *U : LHS->users()) + if (U != I && !(U->hasOneUser() && *U->users().begin() == I)) + hasExternalUser = true; + } + + if (hasExternalUser) + continue; SCEVExpander Expander(*SE, *DL, "nary-reassociate"); SmallVector Ops1{ BExpr, AExpr }; diff --git a/llvm/test/Transforms/NaryReassociate/nary-req.ll b/llvm/test/Transforms/NaryReassociate/nary-req.ll --- a/llvm/test/Transforms/NaryReassociate/nary-req.ll +++ b/llvm/test/Transforms/NaryReassociate/nary-req.ll @@ -3,9 +3,10 @@ ; RUN: opt < %s -passes='nary-reassociate' -S | FileCheck %s declare i32 @llvm.smax.i32(i32 %a, i32 %b) +declare i64 @llvm.umin.i64(i64, i64) ; This is a negative test. We should not optimize if intermediate result -; has a use outside of optimizaple pattern. In other words %smax2 has one +; has a use outside of optimizable pattern. In other words %smax2 has one ; use from %smax3 and side use from %res2. define i32 @smax_test1(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: @smax_test1( @@ -30,3 +31,36 @@ ret i32 %res } +; This is a negative test. It similar to the previous one +; but a bit more complex. In particular after first iteration +; e10 is replaced with %e10.nary = call i64 @llvm.umin.i64(i64 %e5, i64 %e). +; No more reassociation should be applied to %e10.nary since +; %e5 has side use in %e6. +define void @test2(i64 %arg) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[E:%.*]] = sub i64 undef, 0 +; CHECK-NEXT: [[E1:%.*]] = sub i64 [[ARG:%.*]], 0 +; CHECK-NEXT: [[E2:%.*]] = call i64 @llvm.umin.i64(i64 [[E]], i64 [[E1]]) +; CHECK-NEXT: [[E3:%.*]] = call i64 @llvm.umin.i64(i64 [[E2]], i64 16384) +; CHECK-NEXT: [[E4:%.*]] = sub i64 [[ARG]], 0 +; CHECK-NEXT: [[E5:%.*]] = call i64 @llvm.umin.i64(i64 [[E4]], i64 16384) +; CHECK-NEXT: [[E6:%.*]] = icmp ugt i64 [[E5]], 0 +; CHECK-NEXT: [[E10_NARY:%.*]] = call i64 @llvm.umin.i64(i64 [[E5]], i64 [[E]]) +; CHECK-NEXT: unreachable +; +bb: + %e = sub i64 undef, 0 + %e1 = sub i64 %arg, 0 + %e2 = call i64 @llvm.umin.i64(i64 %e, i64 %e1) + %e3 = call i64 @llvm.umin.i64(i64 %e2, i64 16384) + %e4 = sub i64 %arg, 0 + %e5 = call i64 @llvm.umin.i64(i64 %e4, i64 16384) + %e6 = icmp ugt i64 %e5, 0 + %e7 = sub i64 undef, 0 + %e8 = sub i64 %arg, 0 + %e9 = call i64 @llvm.umin.i64(i64 %e7, i64 %e8) + %e10 = call i64 @llvm.umin.i64(i64 %e9, i64 16384) + unreachable +} +