diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -108,6 +108,8 @@ "Number of geps reassociated and hoisted out of the loop"); STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated " "and hoisted out of the loop"); +STATISTIC(NumFPAssociationsHoisted, "Number of invariant FP expressions " + "reassociated and hoisted out of the loop"); /// Memory promotion is enabled by default. static cl::opt @@ -2674,6 +2676,76 @@ return false; } +/// Try to reassociate expressions like ((A1 * B1) + (A2 * B2) + ...) * C where +/// A1, A2, ... and C are loop invariants into expressions like +/// ((A1 * C * B1) + (A2 * C * B2) + ... and hoist the (A1 * C), (A2 * C), ... +/// invariant expressions. This functions returns true only if any hoisting has +/// actually occured. +static bool hoistFPAssociation(Instruction &I, Loop &L, + ICFLoopSafetyInfo &SafetyInfo, + MemorySSAUpdater &MSSAU, AssumptionCache *AC, + DominatorTree *DT) { + using namespace PatternMatch; + Value *VariantOp = nullptr, *InvariantOp = nullptr; + + if (!match(&I, m_FMul(m_Value(VariantOp), m_Value(InvariantOp))) || + !I.hasAllowReassoc()) + return false; + if (L.isLoopInvariant(VariantOp)) + std::swap(VariantOp, InvariantOp); + if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp)) + return false; + Value *Factor = InvariantOp; + + // First, we need to make sure we should do the transformation. + bool AnyAdds = false; + SmallVector Changes; + for (BinaryOperator *Op = nullptr, *OpNext = nullptr, + *VOp = dyn_cast(VariantOp); + VOp; VOp = OpNext) { + if (!VOp->hasOneUse() || !VOp->hasAllowReassoc()) + return false; + if (!match(VOp, m_FAdd(m_BinOp(Op), m_BinOp(OpNext)))) { + Op = VOp; + OpNext = nullptr; + } else + AnyAdds = true; + if (Op->getOpcode() != Instruction::FMul) { + if (!OpNext) + return false; + if (OpNext->getOpcode() != Instruction::FMul) + return false; + std::swap(Op, OpNext); + } + if (!Op->hasOneUse() || !Op->hasAllowReassoc() || L.isLoopInvariant(Op)) + return false; + Use &U0 = Op->getOperandUse(0); + Use &U1 = Op->getOperandUse(1); + if (L.isLoopInvariant(U0)) + Changes.push_back(&U0); + else if (L.isLoopInvariant(U1)) + Changes.push_back(&U1); + else + return false; + if (Changes.size() > 5U) // A reasonable upper limit + return false; + } + if (Changes.empty() || !AnyAdds) + return false; + + // We know we should do it so let's do the transformation. + auto *Preheader = L.getLoopPreheader(); + assert(Preheader && "Loop is not in simplify form?"); + for (auto *U : Changes) { + assert(L.isLoopInvariant(U->get())); + IRBuilder<> Builder(Preheader->getTerminator()); + U->set(Builder.CreateFMulFMF(U->get(), Factor, &I, "factor.op.fmul")); + } + I.replaceAllUsesWith(VariantOp); + eraseInstruction(I, SafetyInfo, MSSAU); + return true; +} + static bool hoistArithmetics(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, AssumptionCache *AC, @@ -2701,6 +2773,12 @@ return true; } + if (hoistFPAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) { + ++NumHoisted; + ++NumFPAssociationsHoisted; + return true; + } + return false; } diff --git a/llvm/test/Transforms/LICM/expr-reassociate.ll b/llvm/test/Transforms/LICM/expr-reassociate.ll --- a/llvm/test/Transforms/LICM/expr-reassociate.ll +++ b/llvm/test/Transforms/LICM/expr-reassociate.ll @@ -179,6 +179,8 @@ ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_2d_fast ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_AFTER_REASSOCIATE-NEXT: entry: +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D2]], [[DELTA]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL1:%.*]] = fmul fast double [[D1]], [[DELTA]] ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND:%.*]] ; LICM_AFTER_REASSOCIATE: for.cond: ; LICM_AFTER_REASSOCIATE-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] @@ -189,14 +191,13 @@ ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[FACTOR_OP_FMUL1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[FACTOR_OP_FMUL]] ; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] -; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]] -; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_ADD]], ptr [[ARRAYIDX_J]], align 8 ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND]] ; LICM_AFTER_REASSOCIATE: for.end: ; LICM_AFTER_REASSOCIATE-NEXT: ret void @@ -319,6 +320,9 @@ ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_3d_fast ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_AFTER_REASSOCIATE-NEXT: entry: +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D3]], [[DELTA]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL2:%.*]] = fmul fast double [[D2]], [[DELTA]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL3:%.*]] = fmul fast double [[D1]], [[DELTA]] ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND:%.*]] ; LICM_AFTER_REASSOCIATE: for.cond: ; LICM_AFTER_REASSOCIATE-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] @@ -329,20 +333,19 @@ ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[FACTOR_OP_FMUL3]] ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[FACTOR_OP_FMUL2]] ; LICM_AFTER_REASSOCIATE-NEXT: [[ADD_J_2:%.*]] = add nuw nsw i32 [[J]], 2 ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J_2:%.*]] = zext i32 [[ADD_J_2]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J_2:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_2]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_3:%.*]] = load double, ptr [[ARRAYIDX_J_2]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_3:%.*]] = fmul fast double [[CELL_3]], [[D3]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_3:%.*]] = fmul fast double [[CELL_3]], [[FACTOR_OP_FMUL]] ; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_ADD1:%.*]] = fadd fast double [[REASS_ADD]], [[FMUL_3]] -; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD1]], [[DELTA]] -; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J_2]], align 8 +; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_ADD1]], ptr [[ARRAYIDX_J_2]], align 8 ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND]] ; LICM_AFTER_REASSOCIATE: for.end: ; LICM_AFTER_REASSOCIATE-NEXT: ret void @@ -540,6 +543,8 @@ ; LICM_ONLY-LABEL: define void @innermost_loop_2d_fast_reassociated ; LICM_ONLY-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_ONLY-NEXT: entry: +; LICM_ONLY-NEXT: [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D2]], [[DELTA]] +; LICM_ONLY-NEXT: [[FACTOR_OP_FMUL1:%.*]] = fmul fast double [[D1]], [[DELTA]] ; LICM_ONLY-NEXT: br label [[FOR_COND:%.*]] ; LICM_ONLY: for.cond: ; LICM_ONLY-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] @@ -550,14 +555,13 @@ ; LICM_ONLY-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 ; LICM_ONLY-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] ; LICM_ONLY-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 -; LICM_ONLY-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_ONLY-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[FACTOR_OP_FMUL1]] ; LICM_ONLY-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 ; LICM_ONLY-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] ; LICM_ONLY-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 -; LICM_ONLY-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]] +; LICM_ONLY-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[FACTOR_OP_FMUL]] ; LICM_ONLY-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] -; LICM_ONLY-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]] -; LICM_ONLY-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8 +; LICM_ONLY-NEXT: store double [[REASS_ADD]], ptr [[ARRAYIDX_J]], align 8 ; LICM_ONLY-NEXT: br label [[FOR_COND]] ; LICM_ONLY: for.end: ; LICM_ONLY-NEXT: ret void @@ -565,6 +569,8 @@ ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_2d_fast_reassociated ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_AFTER_REASSOCIATE-NEXT: entry: +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D2]], [[DELTA]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL1:%.*]] = fmul fast double [[D1]], [[DELTA]] ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND:%.*]] ; LICM_AFTER_REASSOCIATE: for.cond: ; LICM_AFTER_REASSOCIATE-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] @@ -575,14 +581,13 @@ ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[FACTOR_OP_FMUL1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[FACTOR_OP_FMUL]] ; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] -; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]] -; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_ADD]], ptr [[ARRAYIDX_J]], align 8 ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND]] ; LICM_AFTER_REASSOCIATE: for.end: ; LICM_AFTER_REASSOCIATE-NEXT: ret void