diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp --- a/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -31,12 +31,14 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" @@ -58,6 +60,7 @@ #include "llvm/Transforms/Utils/Local.h" #include #include +#include #include using namespace llvm; @@ -1689,6 +1692,85 @@ << '\n'); ++NumFactor; + // This transformation can potentially move a loop invariant computation + // into the loop. This should be avoided as it increases the number of + // computations and has potential to prevent other optimizations that would + // need to know how to revert this transformation in order to succeed. + DominatorTree DT; + DT.recalculate(*(I->getFunction())); + LoopInfoBase LI; + LI.releaseMemory(); + LI.analyze(DT); + + // Go through the operations and see if all the operands belong to the same + // loop. + for (unsigned i = 0; i != Ops.size(); ++i) { + // Only consider expressions we're allowed to transform. + BinaryOperator *BOp = + isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul); + if (!BOp) + continue; + + auto opInALoop = [](LoopInfoBase &LI, + Instruction *I) -> Loop * { + std::function opInLoop = + [&opInLoop](Loop *L, Instruction *I) -> Loop * { + if (L->contains(I)) + return L; + for (auto OneLoop : L->getSubLoops()) { + if (Loop *SubLoop = opInLoop(OneLoop, I)) + return SubLoop; + } + return nullptr; + }; + + for (auto OneLoop : LI) { + if (Loop *L = opInLoop(OneLoop, I)) + return L; + } + return nullptr; + }; + + if (Loop *L = opInALoop(LI, BOp)) { + // Make sure we're not pulling operands from the outside of the loop. + for (Value *Operand : BOp->operands()) { + std::function &, Value *, Loop *)> + isInOneLoop = + [&isInOneLoop, &opInALoop](LoopInfoBase &LI, + Value *V, Loop *L) -> bool { + Instruction *I = dyn_cast(V); + + if (!I) + return true; + if ((opInALoop(LI, I) != L) || (dyn_cast(I))) + return false; + for (Value *Operand : I->operands()) { + if (!isInOneLoop(LI, Operand, L)) + return false; + } + return true; + }; + std::function hasFactor = + [&hasFactor](Value *V, Value *Factor) -> bool { + BinaryOperator *BO = + isReassociableOp(V, Instruction::Mul, Instruction::FMul); + + if (!BO) + return V == Factor; + for (Value *Operand : BO->operands()) { + if (hasFactor(Operand, Factor)) + return true; + } + return false; + }; + + // Only the operands using the factor are being transformed. + if ((hasFactor(Operand, MaxOccVal)) && (!isInOneLoop(LI, Operand, L))) + return nullptr; // Transformation not profitable, giving up. + } + } + } + // Create a new instruction that uses the MaxOccVal twice. If we don't do // this, we could otherwise run into situations where removing a factor // from an expression will drop a use of maxocc, and this can cause diff --git a/llvm/test/Transforms/Reassociate/reassociate-not-from-the-outside-of-the-loop.ll b/llvm/test/Transforms/Reassociate/reassociate-not-from-the-outside-of-the-loop.ll --- a/llvm/test/Transforms/Reassociate/reassociate-not-from-the-outside-of-the-loop.ll +++ b/llvm/test/Transforms/Reassociate/reassociate-not-from-the-outside-of-the-loop.ll @@ -5,16 +5,14 @@ ; computed before the loop anymore. In case of this test, it would add an extra ; multiplication into the loop. -; FIXME: the checks below need to be inverted to confirm the change to the -; Reassociate pass. - define void @innermost_loop(i32 %i, double %d1, double %d2, double %delta, ptr %cells) { ; CHECK-LABEL: @innermost_loop( entry: ; CHECK-LABEL: entry: %mul = fmul fast double %d1, %delta +; CHECK: %{{.*}} = fmul {{.*}} %delta %mul1 = fmul fast double %d2, %delta -; CHECK-NOT: %{{.*}} = fmul {{.*}} %delta +; CHECK: %{{.*}} = fmul {{.*}} %delta br label %for.cond for.cond: @@ -34,8 +32,8 @@ %1 = load double, ptr %arrayidx4 %mul5 = fmul fast double %mul1, %1 %add6 = fadd fast double %mul2, %mul5 -; CHECK: %reass{{.*}} = fadd -; CHECK-NEXT: %reass{{.*}} = fmul {{.*}} %delta +; CHECK-NOT: %reass{{.*}} = fadd +; CHECK-NOT: %reass{{.*}} = fmul store double %add6, ptr %arrayidx4 br label %for.cond