Index: lib/Transforms/Scalar/NaryReassociate.cpp =================================================================== --- lib/Transforms/Scalar/NaryReassociate.cpp +++ lib/Transforms/Scalar/NaryReassociate.cpp @@ -105,7 +105,9 @@ ScalarEvolution *SE; // A lookup table quickly telling which instructions compute the given SCEV. // Note that there can be multiple instructions at different locations - // computing to the same SCEV. For example, + // computing to the same SCEV, so we map a SCEV to an instruction list. For + // example, + // // if (p1) // foo(a + b); // if (p2) @@ -190,17 +192,21 @@ return nullptr; auto &LHSCandidates = Pos->second; - unsigned NumIterations = 0; - // Search at most 10 items to avoid running quadratically. - static const unsigned MaxNumIterations = 10; - for (auto LHS = LHSCandidates.rbegin(); - LHS != LHSCandidates.rend() && NumIterations < MaxNumIterations; - ++LHS, ++NumIterations) { - if (DT->dominates(*LHS, I)) { - Instruction *NewI = BinaryOperator::CreateAdd(*LHS, RHS, "", I); + // Look for the closest dominator LHS of I that computes LHSExpr, and replace + // I with LHS + RHS. + // + // Because we traverse the dominator tree in the pre-order, a + // candidate that doesn't dominate the current instruction won't dominate any + // future instruction either. Therefore, we pop it out of the stack. This + // optimization makes the algorithm O(n). + while (!LHSCandidates.empty()) { + Instruction *LHS = LHSCandidates.back(); + if (DT->dominates(LHS, I)) { + Instruction *NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I); NewI->takeName(I); return NewI; } + LHSCandidates.pop_back(); } return nullptr; } Index: test/Transforms/NaryReassociate/nary-add.ll =================================================================== --- test/Transforms/NaryReassociate/nary-add.ll +++ test/Transforms/NaryReassociate/nary-add.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -nary-reassociate -S | FileCheck %s +; RUN: opt < %s -nary-reassociate -dce -S | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" @@ -105,6 +105,57 @@ ret void } +; This test involves more conditional reassociation candidates. It exercises +; the stack optimization in tryReassociatedAdd that pops the candidates that +; do not dominate the current instruction. +; +; def1 +; cond1 +; / \ +; / \ +; cond2 use2 +; / \ +; / \ +; def2 def3 +; cond3 +; / \ +; / \ +; def4 use1 +; +; NaryReassociate should match use1 with def3, and use2 with def1. +define void @conditional2(i32 %a, i32 %b, i32 %c, i1 %cond1, i1 %cond2, i1 %cond3) { +entry: + %def1 = add i32 %a, %b + br i1 %cond1, label %bb1, label %bb6 +bb1: + br i1 %cond2, label %bb2, label %bb3 +bb2: + %def2 = add i32 %a, %b + call void @foo(i32 %def2) + ret void +bb3: + %def3 = add i32 %a, %b + br i1 %cond3, label %bb4, label %bb5 +bb4: + %def4 = add i32 %a, %b + call void @foo(i32 %def4) + ret void +bb5: + %0 = add i32 %a, %c + %1 = add i32 %0, %b +; CHECK: [[t1:%[0-9]+]] = add i32 %def3, %c + call void @foo(i32 %1) ; foo((a + c) + b); +; CHECK-NEXT: call void @foo(i32 [[t1]]) + ret void +bb6: + %2 = add i32 %a, %c + %3 = add i32 %2, %b +; CHECK: [[t2:%[0-9]+]] = add i32 %def1, %c + call void @foo(i32 %3) ; foo((a + c) + b); +; CHECK-NEXT: call void @foo(i32 [[t2]]) + ret void +} + ; foo((a + b) + c) ; foo(((a + d) + b) + c) ; =>