diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -108,6 +108,8 @@ "Number of geps reassociated and hoisted out of the loop"); STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated " "and hoisted out of the loop"); +STATISTIC(NumFPAssociationsHoisted, "Number of invariant FP expressions " + "reassociated and hoisted out of the loop"); /// Memory promotion is enabled by default. static cl::opt @@ -2669,6 +2671,94 @@ return false; } +static bool hoistFPAssociation(Instruction &I, Loop &L, + ICFLoopSafetyInfo &SafetyInfo, + MemorySSAUpdater &MSSAU, AssumptionCache *AC, + DominatorTree *DT) { + using namespace PatternMatch; + Value *VariantOp = nullptr, *InvariantOp = nullptr; + + if (!(match(&I, m_FMul(m_Value(VariantOp), m_Value(InvariantOp))) && + I.hasAllowReassoc())) + return false; + if (L.isLoopInvariant(VariantOp)) + std::swap(VariantOp, InvariantOp); + if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp)) + return false; + Value *Factor = InvariantOp; + + // First, we need to know if there is enough candidates for transformation. + unsigned candidates = 0U; + for (BinaryOperator *Op = nullptr, *OpNext = nullptr, + *VOp = dyn_cast(VariantOp); + VOp; VOp = OpNext) { + if (!(VOp->hasOneUse() && VOp->hasAllowReassoc())) { + candidates = 0U; + break; + } + if (!(match(VOp, m_FAdd(m_BinOp(Op), m_BinOp(OpNext))))) { + Op = VOp; + OpNext = nullptr; + } + Value *Ops[] = {nullptr, nullptr}; + if (!(match(Op, m_FMul(m_Value(Ops[0]), m_Value(Ops[1]))))) { + if (!OpNext) { + candidates = 0U; + break; + } + if (!(match(OpNext, m_FMul(m_Value(Ops[0]), m_Value(Ops[1]))))) { + candidates = 0U; + break; + } + std::swap(Op, OpNext); + } + if (!(Op->hasOneUse() && Op->hasAllowReassoc())) { + candidates = 0U; + break; + } + if ((L.isLoopInvariant(Ops[0])) || (L.isLoopInvariant(Ops[1]))) + candidates++; + } + if (!(candidates > 1U)) + return false; + + // We know we have enough candidates, let's do the transformations. + auto *Preheader = L.getLoopPreheader(); + assert(Preheader && "Loop is not in simplify form?"); + IRBuilder<> Builder(Preheader->getTerminator()); + for (BinaryOperator *Op = nullptr, *OpNext = nullptr, + *VOp = dyn_cast(VariantOp); + VOp; VOp = OpNext) { + if (!(match(VOp, m_FAdd(m_BinOp(Op), m_BinOp(OpNext))))) { + Op = VOp; + OpNext = nullptr; + } + Value *Ops[] = {nullptr, nullptr}; + if (!(match(Op, m_FMul(m_Value(Ops[0]), m_Value(Ops[1]))))) { + assert(OpNext && "Operation is neither FAdd or FMul!"); + if (!(match(OpNext, m_FMul(m_Value(Ops[0]), m_Value(Ops[1]))))) + OpNext = nullptr; + std::swap(Op, OpNext); + } + assert(Op && "Neither current or next operation is FMul!"); + Value *Factored = nullptr; + for (int i = 0; i < 2; i++) { + if (i || L.isLoopInvariant(Ops[i])) { + assert(!Factored); + Factored = Builder.CreateFMulFMF(Ops[i], Factor, &I, "factor.op.fmul"); + Op->setOperand(i, Factored); + candidates--; + break; + } + } + assert(Factored); + } + assert(!candidates); + I.replaceAllUsesWith(VariantOp); + eraseInstruction(I, SafetyInfo, MSSAU); + return true; +} + static bool hoistArithmetics(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, AssumptionCache *AC, @@ -2696,6 +2786,12 @@ return true; } + if (hoistFPAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) { + ++NumHoisted; + ++NumFPAssociationsHoisted; + return true; + } + return false; } diff --git a/llvm/test/Transforms/LICM/expr-reassociate.ll b/llvm/test/Transforms/LICM/expr-reassociate.ll --- a/llvm/test/Transforms/LICM/expr-reassociate.ll +++ b/llvm/test/Transforms/LICM/expr-reassociate.ll @@ -179,6 +179,8 @@ ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_2d_fast ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_AFTER_REASSOCIATE-NEXT: entry: +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D2]], [[DELTA]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL1:%.*]] = fmul fast double [[D1]], [[DELTA]] ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND:%.*]] ; LICM_AFTER_REASSOCIATE: for.cond: ; LICM_AFTER_REASSOCIATE-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] @@ -189,14 +191,13 @@ ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[FACTOR_OP_FMUL1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[FACTOR_OP_FMUL]] ; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] -; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]] -; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_ADD]], ptr [[ARRAYIDX_J]], align 8 ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND]] ; LICM_AFTER_REASSOCIATE: for.end: ; LICM_AFTER_REASSOCIATE-NEXT: ret void @@ -319,6 +320,9 @@ ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_3d_fast ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_AFTER_REASSOCIATE-NEXT: entry: +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D3]], [[DELTA]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL2:%.*]] = fmul fast double [[D2]], [[DELTA]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL3:%.*]] = fmul fast double [[D1]], [[DELTA]] ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND:%.*]] ; LICM_AFTER_REASSOCIATE: for.cond: ; LICM_AFTER_REASSOCIATE-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] @@ -329,20 +333,19 @@ ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[FACTOR_OP_FMUL3]] ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[FACTOR_OP_FMUL2]] ; LICM_AFTER_REASSOCIATE-NEXT: [[ADD_J_2:%.*]] = add nuw nsw i32 [[J]], 2 ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J_2:%.*]] = zext i32 [[ADD_J_2]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J_2:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_2]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_3:%.*]] = load double, ptr [[ARRAYIDX_J_2]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_3:%.*]] = fmul fast double [[CELL_3]], [[D3]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_3:%.*]] = fmul fast double [[CELL_3]], [[FACTOR_OP_FMUL]] ; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_ADD1:%.*]] = fadd fast double [[REASS_ADD]], [[FMUL_3]] -; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD1]], [[DELTA]] -; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J_2]], align 8 +; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_ADD1]], ptr [[ARRAYIDX_J_2]], align 8 ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND]] ; LICM_AFTER_REASSOCIATE: for.end: ; LICM_AFTER_REASSOCIATE-NEXT: ret void @@ -540,6 +543,8 @@ ; LICM_ONLY-LABEL: define void @innermost_loop_2d_fast_reassociated ; LICM_ONLY-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_ONLY-NEXT: entry: +; LICM_ONLY-NEXT: [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D2]], [[DELTA]] +; LICM_ONLY-NEXT: [[FACTOR_OP_FMUL1:%.*]] = fmul fast double [[D1]], [[DELTA]] ; LICM_ONLY-NEXT: br label [[FOR_COND:%.*]] ; LICM_ONLY: for.cond: ; LICM_ONLY-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] @@ -550,14 +555,13 @@ ; LICM_ONLY-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 ; LICM_ONLY-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] ; LICM_ONLY-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 -; LICM_ONLY-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_ONLY-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[FACTOR_OP_FMUL1]] ; LICM_ONLY-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 ; LICM_ONLY-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] ; LICM_ONLY-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 -; LICM_ONLY-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]] +; LICM_ONLY-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[FACTOR_OP_FMUL]] ; LICM_ONLY-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] -; LICM_ONLY-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]] -; LICM_ONLY-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8 +; LICM_ONLY-NEXT: store double [[REASS_ADD]], ptr [[ARRAYIDX_J]], align 8 ; LICM_ONLY-NEXT: br label [[FOR_COND]] ; LICM_ONLY: for.end: ; LICM_ONLY-NEXT: ret void @@ -565,6 +569,8 @@ ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_2d_fast_reassociated ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_AFTER_REASSOCIATE-NEXT: entry: +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D2]], [[DELTA]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL1:%.*]] = fmul fast double [[D1]], [[DELTA]] ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND:%.*]] ; LICM_AFTER_REASSOCIATE: for.cond: ; LICM_AFTER_REASSOCIATE-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] @@ -575,14 +581,13 @@ ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[FACTOR_OP_FMUL1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[FACTOR_OP_FMUL]] ; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] -; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]] -; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_ADD]], ptr [[ARRAYIDX_J]], align 8 ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND]] ; LICM_AFTER_REASSOCIATE: for.end: ; LICM_AFTER_REASSOCIATE-NEXT: ret void