Index: llvm/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/include/llvm/Analysis/ScalarEvolution.h +++ llvm/include/llvm/Analysis/ScalarEvolution.h @@ -961,6 +961,17 @@ const SCEV *&InvariantLHS, const SCEV *&InvariantRHS); + /// Return true if the result of the predicate LHS `Pred` RHS is loop + /// invariant with respect to L at given Context during at least first + /// MaxIter iterations. Set InvariantPred, InvariantLHS and InvariantLHS so + /// that InvariantLHS `InvariantPred` InvariantRHS is the loop invariant form + /// of LHS `Pred` RHS. The predicate should be the loop's exit condition. + bool isLoopInvariantExitCondDuringFirstIterations( + ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, + const Instruction *Context, const SCEV *MaxIter, + ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS, + const SCEV *&InvariantRHS); + /// Simplify LHS and RHS in a comparison with predicate Pred. Return true /// iff any changes were made. If the operands are provably equal or /// unequal, LHS and RHS are set to the same value and Pred is set to either Index: llvm/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/lib/Analysis/ScalarEvolution.cpp +++ llvm/lib/Analysis/ScalarEvolution.cpp @@ -9279,6 +9279,75 @@ return true; } +bool ScalarEvolution::isLoopInvariantExitCondDuringFirstIterations( + ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, + const Instruction *Context, const SCEV *MaxIter, + ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS, + const SCEV *&InvariantRHS) { + // Try to prove the following set of facts: + // - The predicate is monotonic. + // - If the check does not fail on the 1st iteration: + // - No overflow will happen during first MaxIter iterations; + // - It will not fail on the MaxIter'th iteration. + // If the check does fail on the 1st iteration, we leave the loop and no + // other checks matter. + + // If there is a loop-invariant, force it into the RHS, otherwise bail out. + if (!isLoopInvariant(RHS, L)) { + if (!isLoopInvariant(LHS, L)) + return false; + + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + auto *AR = dyn_cast(LHS); + // TODO: Lift affinity limitation in the future. + if (!AR || AR->getLoop() != L || !AR->isAffine()) + return false; + + // The predicate must be relational (i.e. <, <=, >=, >). + if (!ICmpInst::isRelational(Pred)) + return false; + + // TODO: Support steps other than +/- 1. + const SCEV *Step = AR->getOperand(1); + auto *One = getOne(Step->getType()); + auto *MinusOne = getNegativeSCEV(One); + if (Step != One && Step != MinusOne) + return false; + + // Type mismatch here means that MaxIter is potentially larger than max + // unsigned value in start type, which mean we cannot prove no wrap for the + // indvar. + if (AR->getType() != MaxIter->getType()) + return false; + + // Value of IV on suggested last iteration. + const SCEV *Last = AR->evaluateAtIteration(MaxIter, *this); + // Does it still meet the requirement? + if (!isKnownPredicateAt(Pred, Last, RHS, Context)) + return false; + // Because step is +/- 1 and MaxIter has same type as Start (i.e. it does + // not exceed max unsigned value of this type), this effectively proves + // that there is no wrap during the iteration. To prove that there is no + // signed/unsigned wrap, we need to check that + // Start <= Last for step = 1 or Start >= Last for step = -1. + ICmpInst::Predicate NoOverflowPred = + CmpInst::isSigned(Pred) ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; + if (Step == MinusOne) + NoOverflowPred = CmpInst::getSwappedPredicate(NoOverflowPred); + const SCEV *Start = AR->getStart(); + if (!isKnownPredicateAt(NoOverflowPred, Start, Last, Context)) + return false; + + // Everything is fine. + InvariantPred = Pred; + InvariantLHS = Start; + InvariantRHS = RHS; + return true; +} + bool ScalarEvolution::isKnownPredicateViaConstantRanges( ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { if (HasSameValue(LHS, RHS)) Index: llvm/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -2307,9 +2307,9 @@ } // Returns true if the condition of \p BI being checked is invariant and can be -// proved to be trivially true. +// proved to be trivially true during at least first \p MaxIter iterations. static bool isTrivialCond(const Loop *L, BranchInst *BI, ScalarEvolution *SE, - bool ProvingLoopExit) { + bool ProvingLoopExit, const SCEV *MaxIter) { ICmpInst::Predicate Pred; Value *LHS, *RHS; using namespace PatternMatch; @@ -2335,7 +2335,20 @@ if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) return true; - return false; + if (ProvingLoopExit) + return false; + + ICmpInst::Predicate InvariantPred; + const SCEV *InvariantLHS, *InvariantRHS; + + // Check if there is a loop-invariant predicate equivalent to our check. + if (!SE->isLoopInvariantExitCondDuringFirstIterations( + Pred, LHSS, RHSS, L, BI, MaxIter, InvariantPred, InvariantLHS, + InvariantRHS)) + return false; + + // Can we prove it to be trivially true? + return SE->isKnownPredicateAt(InvariantPred, InvariantLHS, InvariantRHS, BI); } bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { @@ -2413,17 +2426,20 @@ for (BasicBlock *ExitingBB : ExitingBlocks) { const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); if (isa(ExitCount)) { + auto *BI = cast(ExitingBB->getTerminator()); + auto OptimizeCond = [&](bool Inverted, const SCEV *MaxIter) { + if (isTrivialCond(L, BI, SE, Inverted, MaxIter)) { + FoldExit(ExitingBB, Inverted); + return true; + } + return false; + }; // Okay, we do not know the exit count here. Can we at least prove that it // will remain the same within iteration space? - auto *BI = cast(ExitingBB->getTerminator()); - if (isTrivialCond(L, BI, SE, false)) { - FoldExit(ExitingBB, false); + if (OptimizeCond(false, MaxExitCount)) Changed = true; - } - if (isTrivialCond(L, BI, SE, true)) { - FoldExit(ExitingBB, true); + else if (OptimizeCond(true, MaxExitCount)) Changed = true; - } continue; } Index: llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll +++ llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll @@ -12,8 +12,7 @@ ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], -1 -; CHECK-NEXT: [[RC:%.*]] = icmp slt i32 [[IV_NEXT]], [[LEN]] -; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE]], label [[FAIL:%.*]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE]], label [[FAIL:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 0 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] @@ -93,8 +92,7 @@ ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[RC:%.*]] = icmp sgt i32 [[IV_NEXT]], [[LEN]] -; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE]], label [[FAIL:%.*]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE]], label [[FAIL:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 0 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] @@ -172,8 +170,7 @@ ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: [[RC:%.*]] = icmp ugt i32 [[IV_NEXT]], [[LEN]] -; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE]], label [[FAIL:%.*]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE]], label [[FAIL:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 1000 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] @@ -211,8 +208,7 @@ ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], -1 -; CHECK-NEXT: [[RC:%.*]] = icmp slt i32 [[IV_NEXT]], [[LEN]] -; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE]], label [[FAIL:%.*]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE]], label [[FAIL:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 0 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] Index: llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll +++ llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll @@ -71,8 +71,7 @@ ; CHECK-NEXT: br i1 [[ZERO_COND]], label [[EXIT:%.*]], label [[RANGE_CHECK_BLOCK:%.*]] ; CHECK: range_check_block: ; CHECK-NEXT: [[IV_NEXT]] = sub i32 [[IV]], 1 -; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp slt i32 [[IV_NEXT]], [[LEN]] -; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[FAIL:%.*]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE]], label [[FAIL:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, i32* [[P]], i32 [[IV]] ; CHECK-NEXT: [[EL:%.*]] = load i32, i32* [[EL_PTR]], align 4