diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1091,7 +1091,8 @@ /// invariants, available at L's entry. Otherwise, return None. Optional getLoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, - const SCEV *RHS, const Loop *L); + const SCEV *RHS, const Loop *L, + const Instruction *CtxI = nullptr); /// If the result of the predicate LHS `Pred` RHS is loop invariant with /// respect to L at given Context during at least first MaxIter iterations, diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -10756,8 +10756,8 @@ Optional ScalarEvolution::getLoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - const Loop *L) { - + const Loop *L, + const Instruction *CtxI) { // If there is a loop-invariant, force it into the RHS, otherwise bail out. if (!isLoopInvariant(RHS, L)) { if (!isLoopInvariant(LHS, L)) @@ -10794,10 +10794,43 @@ bool Increasing = *MonotonicType == ScalarEvolution::MonotonicallyIncreasing; auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred); - if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS)) + if (isLoopBackedgeGuardedByCond(L, P, LHS, RHS)) + return ScalarEvolution::LoopInvariantPredicate(Pred, ArLHS->getStart(), + RHS); + + if (!CtxI) return None; + // Try to prove via context. + // TODO: Support other cases. + switch (Pred) { + default: + break; + case ICmpInst::ICMP_ULE: + case ICmpInst::ICMP_ULT: { + assert(ArLHS->hasNoUnsignedWrap() && "Is a requirement of monotonicity!"); + // Given preconditions + // (1) ArLHS does not cross 0 (due to NoUnsignedWrap) + // (2) ArLHS =s 0 + // we can replace the loop variant ArLHS ArLHS Start(ArLHS) >=s 0. + // We can strengthen this to Start(ArLHS) getStart(), + RHS); + } + } - return ScalarEvolution::LoopInvariantPredicate(Pred, ArLHS->getStart(), RHS); + return None; } Optional diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -213,7 +213,8 @@ auto *PN = dyn_cast(IVOperand); if (!PN) return false; - auto LIP = SE->getLoopInvariantPredicate(Pred, S, X, L); + + auto LIP = SE->getLoopInvariantPredicate(Pred, S, X, L, ICmp); if (!LIP) return false; ICmpInst::Predicate InvariantPredicate = LIP->Pred; diff --git a/llvm/test/Transforms/IndVarSimplify/cycled_phis.ll b/llvm/test/Transforms/IndVarSimplify/cycled_phis.ll --- a/llvm/test/Transforms/IndVarSimplify/cycled_phis.ll +++ b/llvm/test/Transforms/IndVarSimplify/cycled_phis.ll @@ -320,6 +320,8 @@ ; Slightly more complex version of previous one (cycled phis). ; TODO: remove unsigned comparison by proving non-negativity of iv.start. +; TODO: When we check against IV_START, for some reason we then cannot infer nuw for IV.next. +; It was possible while checking against IV. Missing inference logic somewhere. define i32 @start.from.sibling.iv.wide.cycled.phis(i32* %len.ptr, i32* %sibling.len.ptr) { ; CHECK-LABEL: @start.from.sibling.iv.wide.cycled.phis( ; CHECK-NEXT: entry: @@ -349,10 +351,10 @@ ; CHECK-NEXT: [[SIGNED_CMP:%.*]] = icmp slt i32 [[IV]], [[LEN]] ; CHECK-NEXT: br i1 [[SIGNED_CMP]], label [[SIGNED_PASSED:%.*]], label [[FAILED_SIGNED:%.*]] ; CHECK: signed.passed: -; CHECK-NEXT: [[UNSIGNED_CMP:%.*]] = icmp ult i32 [[IV]], [[LEN]] +; CHECK-NEXT: [[UNSIGNED_CMP:%.*]] = icmp ult i32 [[IV_START]], [[LEN]] ; CHECK-NEXT: br i1 [[UNSIGNED_CMP]], label [[BACKEDGE]], label [[FAILED_UNSIGNED:%.*]] ; CHECK: backedge: -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; CHECK-NEXT: [[COND:%.*]] = call i1 @cond() ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[OUTER_LOOP_BACKEDGE]] ; CHECK: outer.loop.backedge: @@ -467,10 +469,10 @@ ; CHECK-NEXT: [[SIGNED_CMP:%.*]] = icmp slt i32 [[IV]], [[LEN]] ; CHECK-NEXT: br i1 [[SIGNED_CMP]], label [[SIGNED_PASSED:%.*]], label [[FAILED_SIGNED:%.*]] ; CHECK: signed.passed: -; CHECK-NEXT: [[UNSIGNED_CMP:%.*]] = icmp ult i32 [[IV]], [[LEN]] +; CHECK-NEXT: [[UNSIGNED_CMP:%.*]] = icmp ult i32 [[IV_START]], [[LEN]] ; CHECK-NEXT: br i1 [[UNSIGNED_CMP]], label [[BACKEDGE]], label [[FAILED_UNSIGNED:%.*]] ; CHECK: backedge: -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; CHECK-NEXT: [[COND:%.*]] = call i1 @cond() ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[OUTER_LOOP_SELECTION:%.*]] ; CHECK: outer.loop.selection: diff --git a/llvm/test/Transforms/IndVarSimplify/outer_phi.ll b/llvm/test/Transforms/IndVarSimplify/outer_phi.ll --- a/llvm/test/Transforms/IndVarSimplify/outer_phi.ll +++ b/llvm/test/Transforms/IndVarSimplify/outer_phi.ll @@ -412,7 +412,7 @@ ; CHECK-NEXT: [[SIGNED_COND:%.*]] = icmp slt i32 [[IV]], [[B]] ; CHECK-NEXT: br i1 [[SIGNED_COND]], label [[INNER_1:%.*]], label [[SIDE_EXIT:%.*]] ; CHECK: inner.1: -; CHECK-NEXT: [[UNSIGNED_COND:%.*]] = icmp ult i32 [[IV]], [[B]] +; CHECK-NEXT: [[UNSIGNED_COND:%.*]] = icmp ult i32 [[OUTER_IV]], [[B]] ; CHECK-NEXT: br i1 [[UNSIGNED_COND]], label [[INNER_BACKEDGE]], label [[SIDE_EXIT]] ; CHECK: inner.backedge: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 @@ -974,7 +974,7 @@ ; CHECK-NEXT: [[SIGNED_COND:%.*]] = icmp slt i32 [[IV]], [[B]] ; CHECK-NEXT: br i1 [[SIGNED_COND]], label [[INNER_1:%.*]], label [[SIDE_EXIT:%.*]] ; CHECK: inner.1: -; CHECK-NEXT: [[UNSIGNED_COND:%.*]] = icmp ult i32 [[IV]], [[B]] +; CHECK-NEXT: [[UNSIGNED_COND:%.*]] = icmp ult i32 [[OUTER_IV]], [[B]] ; CHECK-NEXT: br i1 [[UNSIGNED_COND]], label [[INNER_BACKEDGE]], label [[SIDE_EXIT]] ; CHECK: inner.backedge: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 @@ -1045,7 +1045,7 @@ ; CHECK-NEXT: [[SIGNED_COND:%.*]] = icmp slt i32 [[IV]], [[B]] ; CHECK-NEXT: br i1 [[SIGNED_COND]], label [[INNER_1:%.*]], label [[SIDE_EXIT:%.*]] ; CHECK: inner.1: -; CHECK-NEXT: [[UNSIGNED_COND:%.*]] = icmp ult i32 [[IV]], [[B]] +; CHECK-NEXT: [[UNSIGNED_COND:%.*]] = icmp ult i32 [[OUTER_IV]], [[B]] ; CHECK-NEXT: br i1 [[UNSIGNED_COND]], label [[INNER_BACKEDGE]], label [[SIDE_EXIT]] ; CHECK: inner.backedge: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 @@ -1128,7 +1128,7 @@ ; CHECK-NEXT: [[SIGNED_COND:%.*]] = icmp slt i32 [[IV]], [[B]] ; CHECK-NEXT: br i1 [[SIGNED_COND]], label [[INNER_1:%.*]], label [[SIDE_EXIT:%.*]] ; CHECK: inner.1: -; CHECK-NEXT: [[UNSIGNED_COND:%.*]] = icmp ult i32 [[IV]], [[B]] +; CHECK-NEXT: [[UNSIGNED_COND:%.*]] = icmp ult i32 [[OUTER_IV]], [[B]] ; CHECK-NEXT: br i1 [[UNSIGNED_COND]], label [[INNER_BACKEDGE]], label [[SIDE_EXIT]] ; CHECK: inner.backedge: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1