diff --git a/llvm/include/llvm/Transforms/Scalar/Reassociate.h b/llvm/include/llvm/Transforms/Scalar/Reassociate.h --- a/llvm/include/llvm/Transforms/Scalar/Reassociate.h +++ b/llvm/include/llvm/Transforms/Scalar/Reassociate.h @@ -37,6 +37,7 @@ class Function; class Instruction; class IRBuilderBase; +class LoopInfo; class Value; /// A private "module" namespace for types and utilities used by Reassociate. @@ -96,17 +97,22 @@ public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &); + // Glue for old PM. + bool runImpl(Function &F, LoopInfo &LI); + private: void BuildRankMap(Function &F, ReversePostOrderTraversal &RPOT); unsigned getRank(Value *V); void canonicalizeOperands(Instruction *I); - void ReassociateExpression(BinaryOperator *I); + void ReassociateExpression(BinaryOperator *I, LoopInfo &LI); void RewriteExprTree(BinaryOperator *I, SmallVectorImpl &Ops); Value *OptimizeExpression(BinaryOperator *I, - SmallVectorImpl &Ops); + SmallVectorImpl &Ops, + LoopInfo &LI); Value *OptimizeAdd(Instruction *I, - SmallVectorImpl &Ops); + SmallVectorImpl &Ops, + LoopInfo &LI); Value *OptimizeXor(Instruction *I, SmallVectorImpl &Ops); bool CombineXorOpnd(Instruction *I, reassociate::XorOpnd *Opnd1, @@ -121,7 +127,7 @@ Value *RemoveFactorFromExpression(Value *V, Value *Factor); void EraseInst(Instruction *I); void RecursivelyEraseDeadInsts(Instruction *I, OrderedSet &Insts); - void OptimizeInst(Instruction *I); + void OptimizeInst(Instruction *I, LoopInfo &LI); Instruction *canonicalizeNegFPConstantsForOp(Instruction *I, Instruction *Op, Value *OtherOp); Instruction *canonicalizeNegFPConstants(Instruction *I); diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp --- a/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" @@ -58,6 +59,7 @@ #include "llvm/Transforms/Utils/Local.h" #include #include +#include #include using namespace llvm; @@ -1533,7 +1535,8 @@ /// optimizes based on identities. If it can be reduced to a single Value, it /// is returned, otherwise the Ops list is mutated as necessary. Value *ReassociatePass::OptimizeAdd(Instruction *I, - SmallVectorImpl &Ops) { + SmallVectorImpl &Ops, + LoopInfo &LI) { // Scan the operand lists looking for X and -X pairs. If we find any, we // can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it, // scan for any @@ -1689,6 +1692,58 @@ << '\n'); ++NumFactor; + // This transformation can potentially move a loop invariant computation + // into the loop. This should be avoided as it increases the number of + // computations and has potential to prevent other optimizations that would + // need to know how to revert this transformation in order to succeed. + + // Go through the operations and see if all the operands belong to the same + // loop. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + // Only consider expressions we're allowed to transform. + BinaryOperator *BOp = + isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul); + if (!BOp) + continue; + + if (Loop *L = LI.getLoopFor(BOp->getParent())) { + // Make sure we're not pulling operands from the outside of the loop. + for (Value *Operand : BOp->operands()) { + std::function isInOneLoop = + [&isInOneLoop](LoopInfo &LI, Value *V, Loop *L) -> bool { + Instruction *I = dyn_cast(V); + + if (!I) + return true; + if ((LI.getLoopFor(I->getParent()) != L) || (dyn_cast(I))) + return false; + for (Value *Operand : I->operands()) { + if (!isInOneLoop(LI, Operand, L)) + return false; + } + return true; + }; + std::function hasFactor = + [&hasFactor](Value *V, Value *Factor) -> bool { + BinaryOperator *BO = + isReassociableOp(V, Instruction::Mul, Instruction::FMul); + + if (!BO) + return V == Factor; + for (Value *Operand : BO->operands()) { + if (hasFactor(Operand, Factor)) + return true; + } + return false; + }; + + // Only the operands using the factor are being transformed. + if ((hasFactor(Operand, MaxOccVal)) && (!isInOneLoop(LI, Operand, L))) + return nullptr; // Transformation not profitable, giving up. + } + } + } + // Create a new instruction that uses the MaxOccVal twice. If we don't do // this, we could otherwise run into situations where removing a factor // from an expression will drop a use of maxocc, and this can cause @@ -1931,7 +1986,8 @@ } Value *ReassociatePass::OptimizeExpression(BinaryOperator *I, - SmallVectorImpl &Ops) { + SmallVectorImpl &Ops, + LoopInfo &LI) { // Now that we have the linearized expression tree, try to optimize it. // Start by folding any constants that we found. const DataLayout &DL = I->getModule()->getDataLayout(); @@ -1985,7 +2041,7 @@ case Instruction::Add: case Instruction::FAdd: - if (Value *Result = OptimizeAdd(I, Ops)) + if (Value *Result = OptimizeAdd(I, Ops, LI)) return Result; break; @@ -1997,7 +2053,7 @@ } if (Ops.size() != NumOps) - return OptimizeExpression(I, Ops); + return OptimizeExpression(I, Ops, LI); return nullptr; } @@ -2182,7 +2238,7 @@ /// Inspect and optimize the given instruction. Note that erasing /// instructions is not allowed. -void ReassociatePass::OptimizeInst(Instruction *I) { +void ReassociatePass::OptimizeInst(Instruction *I, LoopInfo &LI) { // Only consider operations that we understand. if (!isa(I) && !isa(I)) return; @@ -2318,10 +2374,10 @@ cast(BO->user_back())->getOpcode() == Instruction::FSub) return; - ReassociateExpression(BO); + ReassociateExpression(BO, LI); } -void ReassociatePass::ReassociateExpression(BinaryOperator *I) { +void ReassociatePass::ReassociateExpression(BinaryOperator *I, LoopInfo &LI) { // First, walk the expression tree, linearizing the tree, collecting the // operand information. SmallVector Tree; @@ -2343,7 +2399,7 @@ // Now that we have the expression tree in a convenient // sorted form, optimize it globally if possible. - if (Value *V = OptimizeExpression(I, Ops)) { + if (Value *V = OptimizeExpression(I, Ops, LI)) { if (V == I) // Self-referential expression in unreachable code. return; @@ -2507,7 +2563,7 @@ } } -PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) { +bool ReassociatePass::runImpl(Function &F, LoopInfo &LI) { // Get the functions basic blocks in Reverse Post Order. This order is used by // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic // blocks (it has been seen that the analysis in this pass could hang when @@ -2538,7 +2594,7 @@ if (isInstructionTriviallyDead(&*II)) { EraseInst(&*II++); } else { - OptimizeInst(&*II); + OptimizeInst(&*II, LI); assert(II->getParent() == &*BI && "Moved to a different block!"); ++II; } @@ -2566,7 +2622,7 @@ if (isInstructionTriviallyDead(I)) EraseInst(I); else - OptimizeInst(I); + OptimizeInst(I, LI); } } @@ -2576,13 +2632,19 @@ for (auto &Entry : PairMap) Entry.clear(); - if (MadeChange) { - PreservedAnalyses PA; - PA.preserveSet(); - return PA; - } + return MadeChange; +} + +PreservedAnalyses ReassociatePass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &LI = AM.getResult(F); - return PreservedAnalyses::all(); + if (!runImpl(F, LI)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet(); + return PA; } namespace { @@ -2600,10 +2662,7 @@ bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; - - FunctionAnalysisManager DummyFAM; - auto PA = Impl.run(F, DummyFAM); - return !PA.areAllPreserved(); + return Impl.runImpl(F, getAnalysis().getLoopInfo()); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -2611,6 +2670,7 @@ AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); } }; diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -162,8 +162,8 @@ ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass -; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running analysis: LoopAnalysis +; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass ; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll @@ -99,8 +99,8 @@ ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass -; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running analysis: LoopAnalysis +; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass ; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy diff --git a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll @@ -107,8 +107,8 @@ ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass -; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running analysis: LoopAnalysis +; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass ; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy diff --git a/llvm/test/Transforms/Reassociate/reassociate-not-from-the-outside-of-the-loop.ll b/llvm/test/Transforms/Reassociate/reassociate-not-from-the-outside-of-the-loop.ll --- a/llvm/test/Transforms/Reassociate/reassociate-not-from-the-outside-of-the-loop.ll +++ b/llvm/test/Transforms/Reassociate/reassociate-not-from-the-outside-of-the-loop.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -passes=reassociate -S < %s | FileCheck %s ; This test is to ensure that no computations are pulled into a loop @@ -9,12 +10,35 @@ ; Reassociate pass. define void @innermost_loop(i32 %i, double %d1, double %d2, double %delta, ptr %cells) { -; CHECK-LABEL: @innermost_loop( +; CHECK-LABEL: define void @innermost_loop +; CHECK-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[DELTA]], [[D1]] +; CHECK-NEXT: [[MUL1:%.*]] = fmul fast double [[DELTA]], [[D2]] +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[J_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp sgt i32 [[J_0]], [[I]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.body: +; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[J_0]], 1 +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[ADD]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[MUL2:%.*]] = fmul fast double [[MUL]], [[TMP0]] +; CHECK-NEXT: [[IDXPROM3:%.*]] = zext i32 [[J_0]] to i64 +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM3]] +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX4]], align 8 +; CHECK-NEXT: [[MUL5:%.*]] = fmul fast double [[MUL1]], [[TMP1]] +; CHECK-NEXT: [[ADD6:%.*]] = fadd fast double [[MUL5]], [[MUL2]] +; CHECK-NEXT: store double [[ADD6]], ptr [[ARRAYIDX4]], align 8 +; CHECK-NEXT: br label [[FOR_COND]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; entry: -; CHECK-LABEL: entry: %mul = fmul fast double %d1, %delta %mul1 = fmul fast double %d2, %delta -; CHECK-NOT: %{{.*}} = fmul {{.*}} %delta br label %for.cond for.cond: @@ -23,7 +47,6 @@ br i1 %cmp.not, label %for.end, label %for.body for.body: -; CHECK-LABEL: for.body: %add = add nuw nsw i32 %j.0, 1 %idxprom = zext i32 %add to i64 %arrayidx = getelementptr inbounds double, ptr %cells, i64 %idxprom @@ -34,8 +57,6 @@ %1 = load double, ptr %arrayidx4 %mul5 = fmul fast double %mul1, %1 %add6 = fadd fast double %mul2, %mul5 -; CHECK: %reass{{.*}} = fadd -; CHECK-NEXT: %reass{{.*}} = fmul {{.*}} %delta store double %add6, ptr %arrayidx4 br label %for.cond