Index: llvm/trunk/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/trunk/include/llvm/Analysis/ScalarEvolution.h +++ llvm/trunk/include/llvm/Analysis/ScalarEvolution.h @@ -561,6 +561,15 @@ /// pointer. bool checkValidity(const SCEV *S) const; + // Return true if `ExtendOpTy`({`Start`,+,`Step`}) can be proved to be equal + // to {`ExtendOpTy`(`Start`),+,`ExtendOpTy`(`Step`)}. This is equivalent to + // proving no signed (resp. unsigned) wrap in {`Start`,+,`Step`} if + // `ExtendOpTy` is `SCEVSignExtendExpr` (resp. `SCEVZeroExtendExpr`). + // + template + bool proveNoWrapByVaryingStart(const SCEV *Start, const SCEV *Step, + const Loop *L); + public: static char ID; // Pass identification, replacement for typeid ScalarEvolution(); Index: llvm/trunk/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp @@ -1325,6 +1325,85 @@ (SE->*GetExtendExpr)(PreStart, Ty)); } +// Try to prove away overflow by looking at "nearby" add recurrences. A +// motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it +// does not itself wrap then we can conclude that `{1,+,4}` is `nuw`. +// +// Formally: +// +// {S,+,X} == {S-T,+,X} + T +// => Ext({S,+,X}) == Ext({S-T,+,X} + T) +// +// If ({S-T,+,X} + T) does not overflow ... (1) +// +// RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T) +// +// If {S-T,+,X} does not overflow ... (2) +// +// RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T) +// == {Ext(S-T)+Ext(T),+,Ext(X)} +// +// If (S-T)+T does not overflow ... (3) +// +// RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)} +// == {Ext(S),+,Ext(X)} == LHS +// +// Thus, if (1), (2) and (3) are true for some T, then +// Ext({S,+,X}) == {Ext(S),+,Ext(X)} +// +// (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T) +// does not overflow" restricted to the 0th iteration. Therefore we only need +// to check for (1) and (2). +// +// In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T +// is `Delta` (defined below). +// +template +bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start, + const SCEV *Step, + const Loop *L) { + auto WrapType = ExtendOpTraits::WrapType; + + // We restrict `Start` to a constant to prevent SCEV from spending too much + // time here. It is correct (but more expensive) to continue with a + // non-constant `Start` and do a general SCEV subtraction to compute + // `PreStart` below. + // + const SCEVConstant *StartC = dyn_cast(Start); + if (!StartC) + return false; + + APInt StartAI = StartC->getValue()->getValue(); + + for (unsigned Delta : {-2, -1, 1, 2}) { + const SCEV *PreStart = getConstant(StartAI - Delta); + + // Give up if we don't already have the add recurrence we need because + // actually constructing an add recurrence is relatively expensive. + const SCEVAddRecExpr *PreAR = [&]() { + FoldingSetNodeID ID; + ID.AddInteger(scAddRecExpr); + ID.AddPointer(PreStart); + ID.AddPointer(Step); + ID.AddPointer(L); + void *IP = nullptr; + return static_cast( + UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + }(); + + if (PreAR && PreAR->getNoWrapFlags(WrapType)) { // proves (2) + const SCEV *DeltaS = getConstant(StartC->getType(), Delta); + ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; + const SCEV *Limit = ExtendOpTraits::getOverflowLimitForStep( + DeltaS, &Pred, this); + if (Limit && isKnownPredicate(Pred, PreAR, Limit)) // proves (1) + return true; + } + } + + return false; +} + const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && @@ -1473,6 +1552,13 @@ } } } + + if (proveNoWrapByVaryingStart(Start, Step, L)) { + const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); + return getAddRecExpr( + getExtendAddRecStart(AR, Ty, this), + getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + } } // The cast wasn't folded; create an explicit cast node. @@ -1664,6 +1750,13 @@ return getAddExpr(Start, getSignExtendExpr(NewAR, Ty)); } } + + if (proveNoWrapByVaryingStart(Start, Step, L)) { + const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); + return getAddRecExpr( + getExtendAddRecStart(AR, Ty, this), + getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + } } // The cast wasn't folded; create an explicit cast node. Index: llvm/trunk/test/Analysis/ScalarEvolution/nowrap-preinc-limits.ll =================================================================== --- llvm/trunk/test/Analysis/ScalarEvolution/nowrap-preinc-limits.ll +++ llvm/trunk/test/Analysis/ScalarEvolution/nowrap-preinc-limits.ll @@ -0,0 +1,44 @@ +; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s + +define void @f(i1* %condition) { +; CHECK-LABEL: Classifying expressions for: @f + entry: + br label %loop + + loop: + %idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ] + %idx.inc = add nsw i32 %idx, 1 + + %idx.inc2 = add i32 %idx.inc, 1 + %idx.inc2.zext = zext i32 %idx.inc2 to i64 + +; CHECK: %idx.inc2.zext = zext i32 %idx.inc2 to i64 +; CHECK-NEXT: --> {2,+,1}<%loop> + + %c = load volatile i1, i1* %condition + br i1 %c, label %loop, label %exit + + exit: + ret void +} + +define void @g(i1* %condition) { +; CHECK-LABEL: Classifying expressions for: @g + entry: + br label %loop + + loop: + %idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ] + %idx.inc = add nsw i32 %idx, 3 + + %idx.inc2 = add i32 %idx.inc, -1 + %idx.inc2.sext = sext i32 %idx.inc2 to i64 +; CHECK: %idx.inc2.sext = sext i32 %idx.inc2 to i64 +; CHECK-NEXT: --> {2,+,3}<%loop> + + %c = load volatile i1, i1* %condition + br i1 %c, label %loop, label %exit + + exit: + ret void +}