diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -108,6 +108,8 @@ "Number of geps reassociated and hoisted out of the loop"); STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated " "and hoisted out of the loop"); +STATISTIC(NumFPAssociationsHoisted, "Number of invariant FP expressions " + "reassociated and hoisted out of the loop"); /// Memory promotion is enabled by default. static cl::opt @@ -2674,6 +2676,73 @@ return false; } +/// Try to reassociate expressions like ((A1 * B1) + (A2 * B2) + ...) * C where +/// A1, A2, ... and C are loop invariants into expressions like +/// ((A1 * C * B1) + (A2 * C * B2) + ...) and hoist the (A1 * C), (A2 * C), ... +/// invariant expressions. This functions returns true only if any hoisting has +/// actually occured. +static bool hoistFPAssociation(Instruction &I, Loop &L, + ICFLoopSafetyInfo &SafetyInfo, + MemorySSAUpdater &MSSAU, AssumptionCache *AC, + DominatorTree *DT) { + using namespace PatternMatch; + Value *VariantOp = nullptr, *InvariantOp = nullptr; + + if (!match(&I, m_FMul(m_Value(VariantOp), m_Value(InvariantOp))) || + !I.hasAllowReassoc()) + return false; + if (L.isLoopInvariant(VariantOp)) + std::swap(VariantOp, InvariantOp); + if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp)) + return false; + Value *Factor = InvariantOp; + + // First, we need to make sure we should do the transformation. + SmallVector Changes; + SmallVector Worklist; + if (BinaryOperator *VariantBinOp = dyn_cast(VariantOp)) + Worklist.push_back(VariantBinOp); + while (!Worklist.empty()) { + BinaryOperator *BO = Worklist.pop_back_val(); + if (!BO->hasOneUse() || !BO->hasAllowReassoc()) + return false; + BinaryOperator *Op0, *Op1; + if (match(BO, m_FAdd(m_BinOp(Op0), m_BinOp(Op1)))) { + Worklist.push_back(Op0); + Worklist.push_back(Op1); + continue; + } + if (BO->getOpcode() != Instruction::FMul || L.isLoopInvariant(BO)) + return false; + Use &U0 = BO->getOperandUse(0); + Use &U1 = BO->getOperandUse(1); + if (L.isLoopInvariant(U0)) + Changes.push_back(&U0); + else if (L.isLoopInvariant(U1)) + Changes.push_back(&U1); + else + return false; + if (Changes.size() > 5U) // A reasonable upper limit + return false; + } + if (Changes.empty()) + return false; + + // We know we should do it so let's do the transformation. + auto *Preheader = L.getLoopPreheader(); + assert(Preheader && "Loop is not in simplify form?"); + for (auto *U : Changes) { + assert(L.isLoopInvariant(U->get())); + Instruction *Ins = dyn_cast(U->getUser()); + assert(Ins); + IRBuilder<> Builder(Preheader->getTerminator()); + U->set(Builder.CreateFMulFMF(U->get(), Factor, Ins, "factor.op.fmul")); + } + I.replaceAllUsesWith(VariantOp); + eraseInstruction(I, SafetyInfo, MSSAU); + return true; +} + static bool hoistArithmetics(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, AssumptionCache *AC, @@ -2701,6 +2770,12 @@ return true; } + if (hoistFPAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) { + ++NumHoisted; + ++NumFPAssociationsHoisted; + return true; + } + return false; } diff --git a/llvm/test/Transforms/LICM/expr-reassociate.ll b/llvm/test/Transforms/LICM/expr-reassociate.ll --- a/llvm/test/Transforms/LICM/expr-reassociate.ll +++ b/llvm/test/Transforms/LICM/expr-reassociate.ll @@ -147,6 +147,7 @@ ; LICM_ONLY-LABEL: define void @innermost_loop_1d_shouldhoist_fast ; LICM_ONLY-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_ONLY-NEXT: entry: +; LICM_ONLY-NEXT: [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D1]], [[DELTA]] ; LICM_ONLY-NEXT: br label [[FOR_COND:%.*]] ; LICM_ONLY: for.cond: ; LICM_ONLY-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] @@ -157,11 +158,10 @@ ; LICM_ONLY-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 ; LICM_ONLY-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] ; LICM_ONLY-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 -; LICM_ONLY-NEXT: [[FMUL_1:%.*]] = fmul fast double [[D1]], [[CELL_1]] -; LICM_ONLY-NEXT: [[FMUL_2:%.*]] = fmul fast double [[FMUL_1]], [[DELTA]] +; LICM_ONLY-NEXT: [[FMUL_1:%.*]] = fmul fast double [[FACTOR_OP_FMUL]], [[CELL_1]] ; LICM_ONLY-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 ; LICM_ONLY-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] -; LICM_ONLY-NEXT: store double [[FMUL_2]], ptr [[ARRAYIDX_J]], align 8 +; LICM_ONLY-NEXT: store double [[FMUL_1]], ptr [[ARRAYIDX_J]], align 8 ; LICM_ONLY-NEXT: br label [[FOR_COND]] ; LICM_ONLY: for.end: ; LICM_ONLY-NEXT: ret void @@ -287,6 +287,8 @@ ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_2d_fast ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_AFTER_REASSOCIATE-NEXT: entry: +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D1]], [[DELTA]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL1:%.*]] = fmul fast double [[D2]], [[DELTA]] ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND:%.*]] ; LICM_AFTER_REASSOCIATE: for.cond: ; LICM_AFTER_REASSOCIATE-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] @@ -297,14 +299,13 @@ ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[FACTOR_OP_FMUL]] ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[FACTOR_OP_FMUL1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] -; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]] -; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_ADD]], ptr [[ARRAYIDX_J]], align 8 ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND]] ; LICM_AFTER_REASSOCIATE: for.end: ; LICM_AFTER_REASSOCIATE-NEXT: ret void @@ -427,6 +428,9 @@ ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_3d_fast ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_AFTER_REASSOCIATE-NEXT: entry: +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D3]], [[DELTA]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL2:%.*]] = fmul fast double [[D1]], [[DELTA]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL3:%.*]] = fmul fast double [[D2]], [[DELTA]] ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND:%.*]] ; LICM_AFTER_REASSOCIATE: for.cond: ; LICM_AFTER_REASSOCIATE-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] @@ -437,20 +441,19 @@ ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[FACTOR_OP_FMUL2]] ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[FACTOR_OP_FMUL3]] ; LICM_AFTER_REASSOCIATE-NEXT: [[ADD_J_2:%.*]] = add nuw nsw i32 [[J]], 2 ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J_2:%.*]] = zext i32 [[ADD_J_2]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J_2:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_2]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_3:%.*]] = load double, ptr [[ARRAYIDX_J_2]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_3:%.*]] = fmul fast double [[CELL_3]], [[D3]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_3:%.*]] = fmul fast double [[CELL_3]], [[FACTOR_OP_FMUL]] ; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_ADD1:%.*]] = fadd fast double [[REASS_ADD]], [[FMUL_3]] -; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD1]], [[DELTA]] -; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J_2]], align 8 +; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_ADD1]], ptr [[ARRAYIDX_J_2]], align 8 ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND]] ; LICM_AFTER_REASSOCIATE: for.end: ; LICM_AFTER_REASSOCIATE-NEXT: ret void @@ -648,6 +651,8 @@ ; LICM_ONLY-LABEL: define void @innermost_loop_2d_fast_reassociated ; LICM_ONLY-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_ONLY-NEXT: entry: +; LICM_ONLY-NEXT: [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D1]], [[DELTA]] +; LICM_ONLY-NEXT: [[FACTOR_OP_FMUL1:%.*]] = fmul fast double [[D2]], [[DELTA]] ; LICM_ONLY-NEXT: br label [[FOR_COND:%.*]] ; LICM_ONLY: for.cond: ; LICM_ONLY-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] @@ -658,14 +663,13 @@ ; LICM_ONLY-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 ; LICM_ONLY-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] ; LICM_ONLY-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 -; LICM_ONLY-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_ONLY-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[FACTOR_OP_FMUL]] ; LICM_ONLY-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 ; LICM_ONLY-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] ; LICM_ONLY-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 -; LICM_ONLY-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]] +; LICM_ONLY-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[FACTOR_OP_FMUL1]] ; LICM_ONLY-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] -; LICM_ONLY-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]] -; LICM_ONLY-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8 +; LICM_ONLY-NEXT: store double [[REASS_ADD]], ptr [[ARRAYIDX_J]], align 8 ; LICM_ONLY-NEXT: br label [[FOR_COND]] ; LICM_ONLY: for.end: ; LICM_ONLY-NEXT: ret void @@ -673,6 +677,8 @@ ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_2d_fast_reassociated ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_AFTER_REASSOCIATE-NEXT: entry: +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D1]], [[DELTA]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL1:%.*]] = fmul fast double [[D2]], [[DELTA]] ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND:%.*]] ; LICM_AFTER_REASSOCIATE: for.cond: ; LICM_AFTER_REASSOCIATE-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] @@ -683,14 +689,13 @@ ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[FACTOR_OP_FMUL]] ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[FACTOR_OP_FMUL1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] -; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]] -; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_ADD]], ptr [[ARRAYIDX_J]], align 8 ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND]] ; LICM_AFTER_REASSOCIATE: for.end: ; LICM_AFTER_REASSOCIATE-NEXT: ret void