diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -106,6 +106,8 @@ "Number of min/max expressions hoisted out of the loop"); STATISTIC(NumGEPsHoisted, "Number of geps reassociated and hoisted out of the loop"); +STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated " + "and hoisted out of the loop"); /// Memory promotion is enabled by default. static cl::opt @@ -2525,10 +2527,89 @@ return true; } +/// Try to turn things like "LV + C1 < C2" into "LV < C2 - C1". Here +/// C1 and C2 are loop invariants and LV is a loop-variant. +static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS, + Value *InvariantRHS, ICmpInst &ICmp, Loop &L, + ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, + AssumptionCache *AC, DominatorTree *DT) { + assert(ICmpInst::isSigned(Pred) && "Not supported yet!"); + assert(!L.isLoopInvariant(VariantLHS) && "Precondition."); + assert(L.isLoopInvariant(InvariantRHS) && "Precondition."); + + // Try to represent VariantLHS as sum of invariant and variant operands. + using namespace PatternMatch; + Value *VariantOp, *InvariantOp; + if (!match(VariantLHS, m_NSWAdd(m_Value(VariantOp), m_Value(InvariantOp)))) + return false; + + // LHS itself is a loop-variant, try to represent it in the form: + // "VariantOp + InvariantOp". If it is possible, then we can reassociate. + if (L.isLoopInvariant(VariantOp)) + std::swap(VariantOp, InvariantOp); + if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp)) + return false; + + // In order to turn "LV + C1 < C2" into "LV < C2 - C1", we need to be able to + // freely move values from left side of inequality to right side (just as in + // normal linear arithmetics). Overflows make things much more complicated, so + // we want to avoid this. + auto &DL = L.getHeader()->getModule()->getDataLayout(); + bool ProvedNoOverflowAfterReassociate = + computeOverflowForSignedSub(InvariantRHS, InvariantOp, DL, AC, &ICmp, + DT) == llvm::OverflowResult::NeverOverflows; + if (!ProvedNoOverflowAfterReassociate) + return false; + auto *Preheader = L.getLoopPreheader(); + assert(Preheader && "Loop is not in simplify form?"); + IRBuilder<> Builder(Preheader->getTerminator()); + Value *NewCmpOp = Builder.CreateSub(InvariantRHS, InvariantOp, "invariant.op", + /*HasNUW*/ false, /*HasNSW*/ true); + ICmp.setPredicate(Pred); + ICmp.setOperand(0, VariantOp); + ICmp.setOperand(1, NewCmpOp); + eraseInstruction(cast(*VariantLHS), SafetyInfo, MSSAU); + return true; +} + +/// Reassociate and hoist add/sub expressions. +static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, + MemorySSAUpdater &MSSAU, AssumptionCache *AC, + DominatorTree *DT) { + using namespace PatternMatch; + ICmpInst::Predicate Pred; + Value *LHS, *RHS; + if (!match(&I, m_ICmp(Pred, m_Value(LHS), m_Value(RHS)))) + return false; + + // TODO: Support unsigned predicates? + if (!ICmpInst::isSigned(Pred)) + return false; + + // Put variant operand to LHS position. + if (L.isLoopInvariant(LHS)) { + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + // We want to delete the initial operation after reassociation, so only do it + // if it has no other uses. + if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS) || !LHS->hasOneUse()) + return false; + + // TODO: We could go with smarter context, taking common dominator of all I's + // users instead of I itself. + if (hoistAdd(Pred, LHS, RHS, cast(I), L, SafetyInfo, MSSAU, AC, DT)) + return true; + + // TODO: Support Sub. + + return false; +} + static bool hoistArithmetics(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, - MemorySSAUpdater &MSSAU, - AssumptionCache *AC, DominatorTree *DT) { + MemorySSAUpdater &MSSAU, AssumptionCache *AC, + DominatorTree *DT) { // Optimize complex patterns, such as (x < INV1 && x < INV2), turning them // into (x < min(INV1, INV2)), and hoisting the invariant part of this // expression out of the loop. @@ -2545,6 +2626,13 @@ return true; } + // Try to hoist add/sub's by reassociation. + if (hoistAddSub(I, L, SafetyInfo, MSSAU, AC, DT)) { + ++NumHoisted; + ++NumAddSubHoisted; + return true; + } + return false; } diff --git a/llvm/test/Transforms/LICM/hoist-add-sub.ll b/llvm/test/Transforms/LICM/hoist-add-sub.ll --- a/llvm/test/Transforms/LICM/hoist-add-sub.ll +++ b/llvm/test/Transforms/LICM/hoist-add-sub.ll @@ -165,18 +165,18 @@ } -; TODO: x + iv < 4 ==> iv < 4 - x +; x + iv < 4 ==> iv < 4 - x define i32 @test_02(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_02 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0]] ; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 4, [[X]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[ARITH:%.*]] = add nsw i32 [[X]], [[IV]] -; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4 +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]] ; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] @@ -391,18 +391,18 @@ ret i32 -2 } -; TODO: iv + x < 4 ==> iv < 4 - x +; iv + x < 4 ==> iv < 4 - x define i32 @test_04(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_04 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0]] ; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 4, [[X]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[ARITH:%.*]] = add nsw i32 [[IV]], [[X]] -; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4 +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]] ; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]