diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -108,6 +108,8 @@ "Number of geps reassociated and hoisted out of the loop"); STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated " "and hoisted out of the loop"); +STATISTIC(NumFPAssociationsHoisted, "Number of invariant FP expressions " + "reassociated and hoisted out of the loop"); /// Memory promotion is enabled by default. static cl::opt @@ -127,6 +129,12 @@ cl::desc("Max num uses visited for identifying load " "invariance in loop using invariant start (default = 8)")); +cl::opt FPAssociationUpperLimit( + "licm-max-num-fp-reassociations", cl::init(5U), cl::Hidden, + cl::desc( + "Set upper limit for the number of transformations performed " + "during a single round of hoisting the reassociated expressions.")); + // Experimental option to allow imprecision in LICM in pathological cases, in // exchange for faster compile. This is to be removed if MemorySSA starts to // address the same issue. LICM calls MemorySSAWalker's @@ -2674,6 +2682,72 @@ return false; } +/// Try to reassociate expressions like ((A1 * B1) + (A2 * B2) + ...) * C where +/// A1, A2, ... and C are loop invariants into expressions like +/// ((A1 * C * B1) + (A2 * C * B2) + ...) and hoist the (A1 * C), (A2 * C), ... +/// invariant expressions. This functions returns true only if any hoisting has +/// actually occured. +static bool hoistFPAssociation(Instruction &I, Loop &L, + ICFLoopSafetyInfo &SafetyInfo, + MemorySSAUpdater &MSSAU, AssumptionCache *AC, + DominatorTree *DT) { + using namespace PatternMatch; + Value *VariantOp = nullptr, *InvariantOp = nullptr; + + if (!match(&I, m_FMul(m_Value(VariantOp), m_Value(InvariantOp))) || + !I.hasAllowReassoc()) + return false; + if (L.isLoopInvariant(VariantOp)) + std::swap(VariantOp, InvariantOp); + if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp)) + return false; + Value *Factor = InvariantOp; + + // First, we need to make sure we should do the transformation. + SmallVector Changes; + SmallVector Worklist; + if (BinaryOperator *VariantBinOp = dyn_cast(VariantOp)) + Worklist.push_back(VariantBinOp); + while (!Worklist.empty()) { + BinaryOperator *BO = Worklist.pop_back_val(); + if (!BO->hasOneUse() || !BO->hasAllowReassoc()) + return false; + BinaryOperator *Op0, *Op1; + if (match(BO, m_FAdd(m_BinOp(Op0), m_BinOp(Op1)))) { + Worklist.push_back(Op0); + Worklist.push_back(Op1); + continue; + } + if (BO->getOpcode() != Instruction::FMul || L.isLoopInvariant(BO)) + return false; + Use &U0 = BO->getOperandUse(0); + Use &U1 = BO->getOperandUse(1); + if (L.isLoopInvariant(U0)) + Changes.push_back(&U0); + else if (L.isLoopInvariant(U1)) + Changes.push_back(&U1); + else + return false; + if (Changes.size() > FPAssociationUpperLimit) + return false; + } + if (Changes.empty()) + return false; + + // We know we should do it so let's do the transformation. + auto *Preheader = L.getLoopPreheader(); + assert(Preheader && "Loop is not in simplify form?"); + IRBuilder<> Builder(Preheader->getTerminator()); + for (auto *U : Changes) { + assert(L.isLoopInvariant(U->get())); + Instruction *Ins = cast(U->getUser()); + U->set(Builder.CreateFMulFMF(U->get(), Factor, Ins, "factor.op.fmul")); + } + I.replaceAllUsesWith(VariantOp); + eraseInstruction(I, SafetyInfo, MSSAU); + return true; +} + static bool hoistArithmetics(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, AssumptionCache *AC, @@ -2701,6 +2775,12 @@ return true; } + if (hoistFPAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) { + ++NumHoisted; + ++NumFPAssociationsHoisted; + return true; + } + return false; } diff --git a/llvm/test/Transforms/LICM/expr-reassociate.ll b/llvm/test/Transforms/LICM/expr-reassociate.ll --- a/llvm/test/Transforms/LICM/expr-reassociate.ll +++ b/llvm/test/Transforms/LICM/expr-reassociate.ll @@ -1,7 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -passes='reassociate' -S < %s | FileCheck %s --check-prefix=REASSOCIATE_ONLY ; RUN: opt -passes='licm' -S < %s | FileCheck %s --check-prefix=LICM_ONLY +; RUN: opt -passes='licm' -licm-max-num-fp-reassociations=1 -S < %s | FileCheck %s --check-prefix=LICM_ONLY_CONSTRAINED ; RUN: opt -passes='reassociate,loop-mssa(licm)' -S < %s | FileCheck %s --check-prefix=LICM_AFTER_REASSOCIATE +; RUN: opt -passes='reassociate,loop-mssa(licm)' -licm-max-num-fp-reassociations=1 -S < %s | FileCheck %s --check-prefix=LICM_AFTER_REASSOCIATE_CONSTRAINED ; ; A simple loop, should not get modified: @@ -58,6 +60,28 @@ ; LICM_ONLY: for.end: ; LICM_ONLY-NEXT: ret void ; +; LICM_ONLY_CONSTRAINED-LABEL: define void @innermost_loop_1d_fast +; LICM_ONLY_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { +; LICM_ONLY_CONSTRAINED-NEXT: entry: +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_D1:%.*]] = fmul fast double [[D1]], [[DELTA]] +; LICM_ONLY_CONSTRAINED-NEXT: br label [[FOR_COND:%.*]] +; LICM_ONLY_CONSTRAINED: for.cond: +; LICM_ONLY_CONSTRAINED-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] +; LICM_ONLY_CONSTRAINED-NEXT: [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]] +; LICM_ONLY_CONSTRAINED-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; LICM_ONLY_CONSTRAINED: for.body: +; LICM_ONLY_CONSTRAINED-NEXT: [[ADD_J_1]] = add nuw nsw i32 [[J]], 1 +; LICM_ONLY_CONSTRAINED-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 +; LICM_ONLY_CONSTRAINED-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] +; LICM_ONLY_CONSTRAINED-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_1:%.*]] = fmul fast double [[FMUL_D1]], [[CELL_1]] +; LICM_ONLY_CONSTRAINED-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 +; LICM_ONLY_CONSTRAINED-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] +; LICM_ONLY_CONSTRAINED-NEXT: store double [[FMUL_1]], ptr [[ARRAYIDX_J]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: br label [[FOR_COND]] +; LICM_ONLY_CONSTRAINED: for.end: +; LICM_ONLY_CONSTRAINED-NEXT: ret void +; ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_1d_fast ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_AFTER_REASSOCIATE-NEXT: entry: @@ -80,6 +104,28 @@ ; LICM_AFTER_REASSOCIATE: for.end: ; LICM_AFTER_REASSOCIATE-NEXT: ret void ; +; LICM_AFTER_REASSOCIATE_CONSTRAINED-LABEL: define void @innermost_loop_1d_fast +; LICM_AFTER_REASSOCIATE_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: entry: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[FMUL_D1:%.*]] = fmul fast double [[DELTA]], [[D1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br label [[FOR_COND:%.*]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.cond: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.body: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ADD_J_1]] = add nuw nsw i32 [[J]], 1 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[FMUL_1:%.*]] = fmul fast double [[FMUL_D1]], [[CELL_1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: store double [[FMUL_1]], ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br label [[FOR_COND]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.end: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: ret void +; entry: %fmul.d1 = fmul fast double %d1, %delta br label %for.cond @@ -147,6 +193,7 @@ ; LICM_ONLY-LABEL: define void @innermost_loop_1d_shouldhoist_fast ; LICM_ONLY-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_ONLY-NEXT: entry: +; LICM_ONLY-NEXT: [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D1]], [[DELTA]] ; LICM_ONLY-NEXT: br label [[FOR_COND:%.*]] ; LICM_ONLY: for.cond: ; LICM_ONLY-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] @@ -157,15 +204,36 @@ ; LICM_ONLY-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 ; LICM_ONLY-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] ; LICM_ONLY-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 -; LICM_ONLY-NEXT: [[FMUL_1:%.*]] = fmul fast double [[D1]], [[CELL_1]] -; LICM_ONLY-NEXT: [[FMUL_2:%.*]] = fmul fast double [[FMUL_1]], [[DELTA]] +; LICM_ONLY-NEXT: [[FMUL_1:%.*]] = fmul fast double [[FACTOR_OP_FMUL]], [[CELL_1]] ; LICM_ONLY-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 ; LICM_ONLY-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] -; LICM_ONLY-NEXT: store double [[FMUL_2]], ptr [[ARRAYIDX_J]], align 8 +; LICM_ONLY-NEXT: store double [[FMUL_1]], ptr [[ARRAYIDX_J]], align 8 ; LICM_ONLY-NEXT: br label [[FOR_COND]] ; LICM_ONLY: for.end: ; LICM_ONLY-NEXT: ret void ; +; LICM_ONLY_CONSTRAINED-LABEL: define void @innermost_loop_1d_shouldhoist_fast +; LICM_ONLY_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { +; LICM_ONLY_CONSTRAINED-NEXT: entry: +; LICM_ONLY_CONSTRAINED-NEXT: [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D1]], [[DELTA]] +; LICM_ONLY_CONSTRAINED-NEXT: br label [[FOR_COND:%.*]] +; LICM_ONLY_CONSTRAINED: for.cond: +; LICM_ONLY_CONSTRAINED-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] +; LICM_ONLY_CONSTRAINED-NEXT: [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]] +; LICM_ONLY_CONSTRAINED-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; LICM_ONLY_CONSTRAINED: for.body: +; LICM_ONLY_CONSTRAINED-NEXT: [[ADD_J_1]] = add nuw nsw i32 [[J]], 1 +; LICM_ONLY_CONSTRAINED-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 +; LICM_ONLY_CONSTRAINED-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] +; LICM_ONLY_CONSTRAINED-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_1:%.*]] = fmul fast double [[FACTOR_OP_FMUL]], [[CELL_1]] +; LICM_ONLY_CONSTRAINED-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 +; LICM_ONLY_CONSTRAINED-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] +; LICM_ONLY_CONSTRAINED-NEXT: store double [[FMUL_1]], ptr [[ARRAYIDX_J]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: br label [[FOR_COND]] +; LICM_ONLY_CONSTRAINED: for.end: +; LICM_ONLY_CONSTRAINED-NEXT: ret void +; ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_1d_shouldhoist_fast ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_AFTER_REASSOCIATE-NEXT: entry: @@ -188,6 +256,28 @@ ; LICM_AFTER_REASSOCIATE: for.end: ; LICM_AFTER_REASSOCIATE-NEXT: ret void ; +; LICM_AFTER_REASSOCIATE_CONSTRAINED-LABEL: define void @innermost_loop_1d_shouldhoist_fast +; LICM_AFTER_REASSOCIATE_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: entry: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[FMUL_1:%.*]] = fmul fast double [[DELTA]], [[D1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br label [[FOR_COND:%.*]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.cond: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.body: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ADD_J_1]] = add nuw nsw i32 [[J]], 1 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[FMUL_2:%.*]] = fmul fast double [[FMUL_1]], [[CELL_1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: store double [[FMUL_2]], ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br label [[FOR_COND]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.end: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: ret void +; entry: br label %for.cond @@ -284,9 +374,37 @@ ; LICM_ONLY: for.end: ; LICM_ONLY-NEXT: ret void ; +; LICM_ONLY_CONSTRAINED-LABEL: define void @innermost_loop_2d_fast +; LICM_ONLY_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { +; LICM_ONLY_CONSTRAINED-NEXT: entry: +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_D1:%.*]] = fmul fast double [[D1]], [[DELTA]] +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_D2:%.*]] = fmul fast double [[D2]], [[DELTA]] +; LICM_ONLY_CONSTRAINED-NEXT: br label [[FOR_COND:%.*]] +; LICM_ONLY_CONSTRAINED: for.cond: +; LICM_ONLY_CONSTRAINED-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] +; LICM_ONLY_CONSTRAINED-NEXT: [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]] +; LICM_ONLY_CONSTRAINED-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; LICM_ONLY_CONSTRAINED: for.body: +; LICM_ONLY_CONSTRAINED-NEXT: [[ADD_J_1]] = add nuw nsw i32 [[J]], 1 +; LICM_ONLY_CONSTRAINED-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 +; LICM_ONLY_CONSTRAINED-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] +; LICM_ONLY_CONSTRAINED-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_1:%.*]] = fmul fast double [[FMUL_D1]], [[CELL_1]] +; LICM_ONLY_CONSTRAINED-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 +; LICM_ONLY_CONSTRAINED-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] +; LICM_ONLY_CONSTRAINED-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_2:%.*]] = fmul fast double [[FMUL_D2]], [[CELL_2]] +; LICM_ONLY_CONSTRAINED-NEXT: [[FADD_1:%.*]] = fadd fast double [[FMUL_1]], [[FMUL_2]] +; LICM_ONLY_CONSTRAINED-NEXT: store double [[FADD_1]], ptr [[ARRAYIDX_J]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: br label [[FOR_COND]] +; LICM_ONLY_CONSTRAINED: for.end: +; LICM_ONLY_CONSTRAINED-NEXT: ret void +; ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_2d_fast ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_AFTER_REASSOCIATE-NEXT: entry: +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D1]], [[DELTA]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL1:%.*]] = fmul fast double [[D2]], [[DELTA]] ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND:%.*]] ; LICM_AFTER_REASSOCIATE: for.cond: ; LICM_AFTER_REASSOCIATE-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] @@ -297,18 +415,42 @@ ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[FACTOR_OP_FMUL]] ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[FACTOR_OP_FMUL1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] -; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]] -; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_ADD]], ptr [[ARRAYIDX_J]], align 8 ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND]] ; LICM_AFTER_REASSOCIATE: for.end: ; LICM_AFTER_REASSOCIATE-NEXT: ret void ; +; LICM_AFTER_REASSOCIATE_CONSTRAINED-LABEL: define void @innermost_loop_2d_fast +; LICM_AFTER_REASSOCIATE_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: entry: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br label [[FOR_COND:%.*]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.cond: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.body: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ADD_J_1]] = add nuw nsw i32 [[J]], 1 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br label [[FOR_COND]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.end: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: ret void +; entry: %fmul.d1 = fmul fast double %d1, %delta %fmul.d2 = fmul fast double %d2, %delta @@ -424,9 +566,45 @@ ; LICM_ONLY: for.end: ; LICM_ONLY-NEXT: ret void ; +; LICM_ONLY_CONSTRAINED-LABEL: define void @innermost_loop_3d_fast +; LICM_ONLY_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { +; LICM_ONLY_CONSTRAINED-NEXT: entry: +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_D1:%.*]] = fmul fast double [[D1]], [[DELTA]] +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_D2:%.*]] = fmul fast double [[D2]], [[DELTA]] +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_D3:%.*]] = fmul fast double [[D3]], [[DELTA]] +; LICM_ONLY_CONSTRAINED-NEXT: br label [[FOR_COND:%.*]] +; LICM_ONLY_CONSTRAINED: for.cond: +; LICM_ONLY_CONSTRAINED-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] +; LICM_ONLY_CONSTRAINED-NEXT: [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]] +; LICM_ONLY_CONSTRAINED-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; LICM_ONLY_CONSTRAINED: for.body: +; LICM_ONLY_CONSTRAINED-NEXT: [[ADD_J_1]] = add nuw nsw i32 [[J]], 1 +; LICM_ONLY_CONSTRAINED-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 +; LICM_ONLY_CONSTRAINED-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] +; LICM_ONLY_CONSTRAINED-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_1:%.*]] = fmul fast double [[FMUL_D1]], [[CELL_1]] +; LICM_ONLY_CONSTRAINED-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 +; LICM_ONLY_CONSTRAINED-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] +; LICM_ONLY_CONSTRAINED-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_2:%.*]] = fmul fast double [[FMUL_D2]], [[CELL_2]] +; LICM_ONLY_CONSTRAINED-NEXT: [[FADD_1:%.*]] = fadd fast double [[FMUL_1]], [[FMUL_2]] +; LICM_ONLY_CONSTRAINED-NEXT: [[ADD_J_2:%.*]] = add nuw nsw i32 [[J]], 2 +; LICM_ONLY_CONSTRAINED-NEXT: [[IDXPROM_J_2:%.*]] = zext i32 [[ADD_J_2]] to i64 +; LICM_ONLY_CONSTRAINED-NEXT: [[ARRAYIDX_J_2:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_2]] +; LICM_ONLY_CONSTRAINED-NEXT: [[CELL_3:%.*]] = load double, ptr [[ARRAYIDX_J_2]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_3:%.*]] = fmul fast double [[FMUL_D3]], [[CELL_3]] +; LICM_ONLY_CONSTRAINED-NEXT: [[FADD_2:%.*]] = fadd fast double [[FADD_1]], [[FMUL_3]] +; LICM_ONLY_CONSTRAINED-NEXT: store double [[FADD_2]], ptr [[ARRAYIDX_J_2]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: br label [[FOR_COND]] +; LICM_ONLY_CONSTRAINED: for.end: +; LICM_ONLY_CONSTRAINED-NEXT: ret void +; ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_3d_fast ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_AFTER_REASSOCIATE-NEXT: entry: +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D3]], [[DELTA]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL2:%.*]] = fmul fast double [[D1]], [[DELTA]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL3:%.*]] = fmul fast double [[D2]], [[DELTA]] ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND:%.*]] ; LICM_AFTER_REASSOCIATE: for.cond: ; LICM_AFTER_REASSOCIATE-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] @@ -437,24 +615,54 @@ ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[FACTOR_OP_FMUL2]] ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[FACTOR_OP_FMUL3]] ; LICM_AFTER_REASSOCIATE-NEXT: [[ADD_J_2:%.*]] = add nuw nsw i32 [[J]], 2 ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J_2:%.*]] = zext i32 [[ADD_J_2]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J_2:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_2]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_3:%.*]] = load double, ptr [[ARRAYIDX_J_2]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_3:%.*]] = fmul fast double [[CELL_3]], [[D3]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_3:%.*]] = fmul fast double [[CELL_3]], [[FACTOR_OP_FMUL]] ; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_ADD1:%.*]] = fadd fast double [[REASS_ADD]], [[FMUL_3]] -; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD1]], [[DELTA]] -; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J_2]], align 8 +; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_ADD1]], ptr [[ARRAYIDX_J_2]], align 8 ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND]] ; LICM_AFTER_REASSOCIATE: for.end: ; LICM_AFTER_REASSOCIATE-NEXT: ret void ; +; LICM_AFTER_REASSOCIATE_CONSTRAINED-LABEL: define void @innermost_loop_3d_fast +; LICM_AFTER_REASSOCIATE_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: entry: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br label [[FOR_COND:%.*]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.cond: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.body: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ADD_J_1]] = add nuw nsw i32 [[J]], 1 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ADD_J_2:%.*]] = add nuw nsw i32 [[J]], 2 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[IDXPROM_J_2:%.*]] = zext i32 [[ADD_J_2]] to i64 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ARRAYIDX_J_2:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_2]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CELL_3:%.*]] = load double, ptr [[ARRAYIDX_J_2]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[FMUL_3:%.*]] = fmul fast double [[CELL_3]], [[D3]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[REASS_ADD1:%.*]] = fadd fast double [[REASS_ADD]], [[FMUL_3]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD1]], [[DELTA]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J_2]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br label [[FOR_COND]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.end: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: ret void +; entry: %fmul.d1 = fmul fast double %d1, %delta %fmul.d2 = fmul fast double %d2, %delta @@ -547,6 +755,32 @@ ; LICM_ONLY: for.end: ; LICM_ONLY-NEXT: ret void ; +; LICM_ONLY_CONSTRAINED-LABEL: define void @innermost_loop_2d_nofast +; LICM_ONLY_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { +; LICM_ONLY_CONSTRAINED-NEXT: entry: +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_D1:%.*]] = fmul double [[D1]], [[DELTA]] +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_D2:%.*]] = fmul double [[D2]], [[DELTA]] +; LICM_ONLY_CONSTRAINED-NEXT: br label [[FOR_COND:%.*]] +; LICM_ONLY_CONSTRAINED: for.cond: +; LICM_ONLY_CONSTRAINED-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] +; LICM_ONLY_CONSTRAINED-NEXT: [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]] +; LICM_ONLY_CONSTRAINED-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; LICM_ONLY_CONSTRAINED: for.body: +; LICM_ONLY_CONSTRAINED-NEXT: [[ADD_J_1]] = add nuw nsw i32 [[J]], 1 +; LICM_ONLY_CONSTRAINED-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 +; LICM_ONLY_CONSTRAINED-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] +; LICM_ONLY_CONSTRAINED-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_1:%.*]] = fmul double [[FMUL_D1]], [[CELL_1]] +; LICM_ONLY_CONSTRAINED-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 +; LICM_ONLY_CONSTRAINED-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] +; LICM_ONLY_CONSTRAINED-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_2:%.*]] = fmul double [[FMUL_D2]], [[CELL_2]] +; LICM_ONLY_CONSTRAINED-NEXT: [[FADD_1:%.*]] = fadd double [[FMUL_1]], [[FMUL_2]] +; LICM_ONLY_CONSTRAINED-NEXT: store double [[FADD_1]], ptr [[ARRAYIDX_J]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: br label [[FOR_COND]] +; LICM_ONLY_CONSTRAINED: for.end: +; LICM_ONLY_CONSTRAINED-NEXT: ret void +; ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_2d_nofast ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_AFTER_REASSOCIATE-NEXT: entry: @@ -573,6 +807,32 @@ ; LICM_AFTER_REASSOCIATE: for.end: ; LICM_AFTER_REASSOCIATE-NEXT: ret void ; +; LICM_AFTER_REASSOCIATE_CONSTRAINED-LABEL: define void @innermost_loop_2d_nofast +; LICM_AFTER_REASSOCIATE_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: entry: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[FMUL_D1:%.*]] = fmul double [[D1]], [[DELTA]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[FMUL_D2:%.*]] = fmul double [[D2]], [[DELTA]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br label [[FOR_COND:%.*]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.cond: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.body: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ADD_J_1]] = add nuw nsw i32 [[J]], 1 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[FMUL_1:%.*]] = fmul double [[FMUL_D1]], [[CELL_1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[FMUL_2:%.*]] = fmul double [[FMUL_D2]], [[CELL_2]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[FADD_1:%.*]] = fadd double [[FMUL_1]], [[FMUL_2]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: store double [[FADD_1]], ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br label [[FOR_COND]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.end: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: ret void +; entry: %fmul.d1 = fmul double %d1, %delta %fmul.d2 = fmul double %d2, %delta @@ -648,6 +908,8 @@ ; LICM_ONLY-LABEL: define void @innermost_loop_2d_fast_reassociated ; LICM_ONLY-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_ONLY-NEXT: entry: +; LICM_ONLY-NEXT: [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D1]], [[DELTA]] +; LICM_ONLY-NEXT: [[FACTOR_OP_FMUL1:%.*]] = fmul fast double [[D2]], [[DELTA]] ; LICM_ONLY-NEXT: br label [[FOR_COND:%.*]] ; LICM_ONLY: for.cond: ; LICM_ONLY-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] @@ -658,21 +920,47 @@ ; LICM_ONLY-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 ; LICM_ONLY-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] ; LICM_ONLY-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 -; LICM_ONLY-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_ONLY-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[FACTOR_OP_FMUL]] ; LICM_ONLY-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 ; LICM_ONLY-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] ; LICM_ONLY-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 -; LICM_ONLY-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]] +; LICM_ONLY-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[FACTOR_OP_FMUL1]] ; LICM_ONLY-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] -; LICM_ONLY-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]] -; LICM_ONLY-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8 +; LICM_ONLY-NEXT: store double [[REASS_ADD]], ptr [[ARRAYIDX_J]], align 8 ; LICM_ONLY-NEXT: br label [[FOR_COND]] ; LICM_ONLY: for.end: ; LICM_ONLY-NEXT: ret void ; +; LICM_ONLY_CONSTRAINED-LABEL: define void @innermost_loop_2d_fast_reassociated +; LICM_ONLY_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { +; LICM_ONLY_CONSTRAINED-NEXT: entry: +; LICM_ONLY_CONSTRAINED-NEXT: br label [[FOR_COND:%.*]] +; LICM_ONLY_CONSTRAINED: for.cond: +; LICM_ONLY_CONSTRAINED-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] +; LICM_ONLY_CONSTRAINED-NEXT: [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]] +; LICM_ONLY_CONSTRAINED-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; LICM_ONLY_CONSTRAINED: for.body: +; LICM_ONLY_CONSTRAINED-NEXT: [[ADD_J_1]] = add nuw nsw i32 [[J]], 1 +; LICM_ONLY_CONSTRAINED-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 +; LICM_ONLY_CONSTRAINED-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] +; LICM_ONLY_CONSTRAINED-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_ONLY_CONSTRAINED-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 +; LICM_ONLY_CONSTRAINED-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] +; LICM_ONLY_CONSTRAINED-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]] +; LICM_ONLY_CONSTRAINED-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] +; LICM_ONLY_CONSTRAINED-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]] +; LICM_ONLY_CONSTRAINED-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: br label [[FOR_COND]] +; LICM_ONLY_CONSTRAINED: for.end: +; LICM_ONLY_CONSTRAINED-NEXT: ret void +; ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_2d_fast_reassociated ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_AFTER_REASSOCIATE-NEXT: entry: +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL:%.*]] = fmul fast double [[D1]], [[DELTA]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FACTOR_OP_FMUL1:%.*]] = fmul fast double [[D2]], [[DELTA]] ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND:%.*]] ; LICM_AFTER_REASSOCIATE: for.cond: ; LICM_AFTER_REASSOCIATE-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] @@ -683,18 +971,42 @@ ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[FACTOR_OP_FMUL]] ; LICM_AFTER_REASSOCIATE-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 ; LICM_AFTER_REASSOCIATE-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] ; LICM_AFTER_REASSOCIATE-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 -; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]] +; LICM_AFTER_REASSOCIATE-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[FACTOR_OP_FMUL1]] ; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] -; LICM_AFTER_REASSOCIATE-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]] -; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE-NEXT: store double [[REASS_ADD]], ptr [[ARRAYIDX_J]], align 8 ; LICM_AFTER_REASSOCIATE-NEXT: br label [[FOR_COND]] ; LICM_AFTER_REASSOCIATE: for.end: ; LICM_AFTER_REASSOCIATE-NEXT: ret void ; +; LICM_AFTER_REASSOCIATE_CONSTRAINED-LABEL: define void @innermost_loop_2d_fast_reassociated +; LICM_AFTER_REASSOCIATE_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: entry: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br label [[FOR_COND:%.*]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.cond: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.body: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ADD_J_1]] = add nuw nsw i32 [[J]], 1 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_2]], [[D2]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[REASS_ADD]], [[DELTA]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br label [[FOR_COND]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.end: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: ret void +; entry: br label %for.cond @@ -777,6 +1089,31 @@ ; LICM_ONLY: for.end: ; LICM_ONLY-NEXT: ret void ; +; LICM_ONLY_CONSTRAINED-LABEL: define void @innermost_loop_2d_nofast_reassociated +; LICM_ONLY_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { +; LICM_ONLY_CONSTRAINED-NEXT: entry: +; LICM_ONLY_CONSTRAINED-NEXT: br label [[FOR_COND:%.*]] +; LICM_ONLY_CONSTRAINED: for.cond: +; LICM_ONLY_CONSTRAINED-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] +; LICM_ONLY_CONSTRAINED-NEXT: [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]] +; LICM_ONLY_CONSTRAINED-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; LICM_ONLY_CONSTRAINED: for.body: +; LICM_ONLY_CONSTRAINED-NEXT: [[ADD_J_1]] = add nuw nsw i32 [[J]], 1 +; LICM_ONLY_CONSTRAINED-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 +; LICM_ONLY_CONSTRAINED-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] +; LICM_ONLY_CONSTRAINED-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_1:%.*]] = fmul double [[CELL_1]], [[D1]] +; LICM_ONLY_CONSTRAINED-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 +; LICM_ONLY_CONSTRAINED-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] +; LICM_ONLY_CONSTRAINED-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_2:%.*]] = fmul double [[CELL_2]], [[D2]] +; LICM_ONLY_CONSTRAINED-NEXT: [[REASS_ADD:%.*]] = fadd double [[FMUL_2]], [[FMUL_1]] +; LICM_ONLY_CONSTRAINED-NEXT: [[REASS_MUL:%.*]] = fmul double [[REASS_ADD]], [[DELTA]] +; LICM_ONLY_CONSTRAINED-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: br label [[FOR_COND]] +; LICM_ONLY_CONSTRAINED: for.end: +; LICM_ONLY_CONSTRAINED-NEXT: ret void +; ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_2d_nofast_reassociated ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_AFTER_REASSOCIATE-NEXT: entry: @@ -802,6 +1139,31 @@ ; LICM_AFTER_REASSOCIATE: for.end: ; LICM_AFTER_REASSOCIATE-NEXT: ret void ; +; LICM_AFTER_REASSOCIATE_CONSTRAINED-LABEL: define void @innermost_loop_2d_nofast_reassociated +; LICM_AFTER_REASSOCIATE_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: entry: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br label [[FOR_COND:%.*]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.cond: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.body: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ADD_J_1]] = add nuw nsw i32 [[J]], 1 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[FMUL_1:%.*]] = fmul double [[D1]], [[CELL_1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[FMUL_2:%.*]] = fmul double [[D2]], [[CELL_2]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[REASS_ADD:%.*]] = fadd double [[FMUL_1]], [[FMUL_2]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[REASS_MUL:%.*]] = fmul double [[DELTA]], [[REASS_ADD]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br label [[FOR_COND]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.end: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: ret void +; entry: br label %for.cond @@ -905,6 +1267,37 @@ ; LICM_ONLY: for.end: ; LICM_ONLY-NEXT: ret void ; +; LICM_ONLY_CONSTRAINED-LABEL: define void @innermost_loop_3d_fast_reassociated_different +; LICM_ONLY_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { +; LICM_ONLY_CONSTRAINED-NEXT: entry: +; LICM_ONLY_CONSTRAINED-NEXT: br label [[FOR_COND:%.*]] +; LICM_ONLY_CONSTRAINED: for.cond: +; LICM_ONLY_CONSTRAINED-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] +; LICM_ONLY_CONSTRAINED-NEXT: [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]] +; LICM_ONLY_CONSTRAINED-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; LICM_ONLY_CONSTRAINED: for.body: +; LICM_ONLY_CONSTRAINED-NEXT: [[ADD_J_1]] = add nuw nsw i32 [[J]], 1 +; LICM_ONLY_CONSTRAINED-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 +; LICM_ONLY_CONSTRAINED-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] +; LICM_ONLY_CONSTRAINED-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: [[IDXPROM_J_2:%.*]] = zext i32 [[ADD_J_1]] to i64 +; LICM_ONLY_CONSTRAINED-NEXT: [[ARRAYIDX_J_2:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_2]] +; LICM_ONLY_CONSTRAINED-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J_2]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: [[CELL_3:%.*]] = load double, ptr [[ARRAYIDX_J_2]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 +; LICM_ONLY_CONSTRAINED-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] +; LICM_ONLY_CONSTRAINED-NEXT: [[CELL_4:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_ONLY_CONSTRAINED-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_4]], [[D2]] +; LICM_ONLY_CONSTRAINED-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[FMUL_2]], [[FMUL_1]] +; LICM_ONLY_CONSTRAINED-NEXT: [[EXTRA_MUL:%.*]] = fmul fast double [[CELL_3]], [[CELL_2]] +; LICM_ONLY_CONSTRAINED-NEXT: [[EXTRA_ADD:%.*]] = fadd fast double [[EXTRA_MUL]], [[REASS_ADD]] +; LICM_ONLY_CONSTRAINED-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[EXTRA_ADD]], [[DELTA]] +; LICM_ONLY_CONSTRAINED-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8 +; LICM_ONLY_CONSTRAINED-NEXT: br label [[FOR_COND]] +; LICM_ONLY_CONSTRAINED: for.end: +; LICM_ONLY_CONSTRAINED-NEXT: ret void +; ; LICM_AFTER_REASSOCIATE-LABEL: define void @innermost_loop_3d_fast_reassociated_different ; LICM_AFTER_REASSOCIATE-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { ; LICM_AFTER_REASSOCIATE-NEXT: entry: @@ -936,6 +1329,37 @@ ; LICM_AFTER_REASSOCIATE: for.end: ; LICM_AFTER_REASSOCIATE-NEXT: ret void ; +; LICM_AFTER_REASSOCIATE_CONSTRAINED-LABEL: define void @innermost_loop_3d_fast_reassociated_different +; LICM_AFTER_REASSOCIATE_CONSTRAINED-SAME: (i32 [[I:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[DELTA:%.*]], ptr [[CELLS:%.*]]) { +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: entry: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br label [[FOR_COND:%.*]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.cond: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CMP_NOT:%.*]] = icmp sgt i32 [[J]], [[I]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.body: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ADD_J_1]] = add nuw nsw i32 [[J]], 1 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[IDXPROM_J_1:%.*]] = zext i32 [[ADD_J_1]] to i64 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ARRAYIDX_J_1:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CELL_1:%.*]] = load double, ptr [[ARRAYIDX_J_1]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[IDXPROM_J_2:%.*]] = zext i32 [[ADD_J_1]] to i64 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ARRAYIDX_J_2:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J_2]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CELL_2:%.*]] = load double, ptr [[ARRAYIDX_J_2]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CELL_3:%.*]] = load double, ptr [[ARRAYIDX_J_2]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds double, ptr [[CELLS]], i64 [[IDXPROM_J]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[CELL_4:%.*]] = load double, ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[FMUL_1:%.*]] = fmul fast double [[CELL_1]], [[D1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[FMUL_2:%.*]] = fmul fast double [[CELL_4]], [[D2]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[EXTRA_MUL:%.*]] = fmul fast double [[CELL_3]], [[CELL_2]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[REASS_ADD:%.*]] = fadd fast double [[EXTRA_MUL]], [[FMUL_1]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[EXTRA_ADD:%.*]] = fadd fast double [[REASS_ADD]], [[FMUL_2]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: [[REASS_MUL:%.*]] = fmul fast double [[EXTRA_ADD]], [[DELTA]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: store double [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8 +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: br label [[FOR_COND]] +; LICM_AFTER_REASSOCIATE_CONSTRAINED: for.end: +; LICM_AFTER_REASSOCIATE_CONSTRAINED-NEXT: ret void +; entry: br label %for.cond