Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -107,6 +107,8 @@ "Number of min/max expressions hoisted out of the loop"); STATISTIC(NumGEPsHoisted, "Number of geps reassociated and hoisted out of the loop"); +STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated " + "and hoisted out of the loop"); /// Memory promotion is enabled by default. static cl::opt @@ -173,7 +175,7 @@ static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU); /// Aggregates various functions for hoisting computations out of loop. -static bool hoistArithmetics(Instruction &I, Loop &L, +static bool hoistArithmetics(Instruction &I, Loop &L, ScalarEvolution &SE, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, AssumptionCache *AC, DominatorTree *DT); @@ -987,7 +989,7 @@ // Try to reassociate instructions so that part of computations can be // done out of loop. - if (hoistArithmetics(I, *CurLoop, *SafetyInfo, MSSAU, AC, DT)) { + if (hoistArithmetics(I, *CurLoop, *SE, *SafetyInfo, MSSAU, AC, DT)) { Changed = true; continue; } @@ -2541,10 +2543,72 @@ return true; } -static bool hoistArithmetics(Instruction &I, Loop &L, +/// Try to turn things like "LV + C1 < C2" into "LV < C2 - C1". Here +/// C1 and C2 are loop invariants and LV is a loop-variant. +static bool hoistAddSub(Instruction &I, Loop &L, ScalarEvolution &SE, + ICFLoopSafetyInfo &SafetyInfo, + MemorySSAUpdater &MSSAU) { + using namespace PatternMatch; + ICmpInst::Predicate Pred; + Value *LHS, *RHS; + if (!match(&I, m_ICmp(Pred, m_Value(LHS), m_Value(RHS)))) + return false; + + // TODO: Support unsigned predicates? + if (!ICmpInst::isSigned(Pred)) + return false; + + // Put variant operand to LHS position. + if (L.isLoopInvariant(LHS)) { + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + // We want to delete the initial operation after reassociation, so only do it + // if it has no other uses. + if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS) || !LHS->hasOneUse()) + return false; + Value *VariantOp, *InvariantOp; + + // TODO: support sub. + if (!match(LHS, m_Add(m_Value(VariantOp), m_Value(InvariantOp)))) + return false; + + // LHS itsels if a invariant, try to represent it in the form: + // "VariantOp + InvariantOp". If it it possible, then we can reassociate. + if (L.isLoopInvariant(VariantOp)) + std::swap(VariantOp, InvariantOp); + if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp)) + return false; + + // In order to turn "LV + C1 < C2" into "LV < C2 - C1", we need to be able to + // freely move values from left side of inequality to right side (just as in + // normal linear arithmetics). Overflows make things much more complicated, so + // we want to avoid this. No-overflow on operation is guaranteed by no-wrap + // flag on operation, and the no-overflow of the new RHS is checked below. + if (!cast(LHS)->hasNoSignedWrap()) + return false; + const SCEV *RHSS = SE.getSCEV(RHS); + const SCEV *InvariantOpS = SE.getNegativeSCEV(SE.getSCEV(InvariantOp)); + if (!SE.willNotOverflow(Instruction::BinaryOps::Add, /*Signed*/ true, RHSS, + InvariantOpS, /*CtxI*/ &I)) + return false; + auto *Preheader = L.getLoopPreheader(); + assert(Preheader && "Loop is not in simplify form?"); + IRBuilder<> Builder(Preheader->getTerminator()); + Value *NewCmpOp = Builder.CreateSub(RHS, InvariantOp, "invariant.op", + /*HasNUW*/ false, /*HasNSW*/ true); + auto *ICmp = cast(&I); + ICmp->setPredicate(Pred); + ICmp->setOperand(0, VariantOp); + ICmp->setOperand(1, NewCmpOp); + eraseInstruction(cast(*LHS), SafetyInfo, MSSAU); + return true; +} + +static bool hoistArithmetics(Instruction &I, Loop &L, ScalarEvolution &SE, ICFLoopSafetyInfo &SafetyInfo, - MemorySSAUpdater &MSSAU, - AssumptionCache *AC, DominatorTree *DT) { + MemorySSAUpdater &MSSAU, AssumptionCache *AC, + DominatorTree *DT) { // Optimize complex patterns, such as (x < INV1 && x < INV2), turning them // into (x < min(INV1, INV2)), and hoisting the invariant part of this // expression out of the loop. @@ -2561,6 +2625,13 @@ return true; } + // Try to hoist add/sub's by reassociation. + if (hoistAddSub(I, L, SE, SafetyInfo, MSSAU)) { + ++NumHoisted; + ++NumAddSubHoisted; + return true; + } + return false; } Index: llvm/test/Transforms/LICM/hoist-add-sub.ll =================================================================== --- llvm/test/Transforms/LICM/hoist-add-sub.ll +++ llvm/test/Transforms/LICM/hoist-add-sub.ll @@ -165,18 +165,18 @@ } -; TODO: x + iv < 4 ==> iv < 4 - x +; x + iv < 4 ==> iv < 4 - x define i32 @test_02(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_02 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0]] ; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 4, [[X]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[ARITH:%.*]] = add nsw i32 [[X]], [[IV]] -; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4 +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]] ; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] @@ -391,18 +391,18 @@ ret i32 -2 } -; TODO: iv + x < 4 ==> iv < 4 - x +; iv + x < 4 ==> iv < 4 - x define i32 @test_04(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_04 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0]] ; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 4, [[X]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[ARITH:%.*]] = add nsw i32 [[IV]], [[X]] -; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4 +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]] ; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]