Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -561,6 +561,15 @@ /// pointer. bool checkValidity(const SCEV *S) const; + // Return true if `ExtendOpTy`({`Start`,+,`Step`}) can be proved to be equal + // to {`ExtendOpTy`(`Start`),+,`ExtendOpTy`(`Step`)}. This is equivalent to + // proving no signed (resp. unsigned) wrap in {`Start`,+,`Step`} if + // `ExtendOpTy` is `SCEVSignExtendExpr` (resp. `SCEVZeroExtendExpr`). + // + template + bool proveNoWrapByVaryingStart(const SCEV *Start, const SCEV *Step, + const Loop *L); + public: static char ID; // Pass identification, replacement for typeid ScalarEvolution(); Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -1185,6 +1185,7 @@ struct ExtendOpTraitsBase { typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *); + typedef APInt (APInt::*GetExtendAPIntTy)(unsigned width) const; }; // Used to make code generic over signed and unsigned overflow. @@ -1198,6 +1199,8 @@ // static const SCEV *getOverflowLimitForStep(const SCEV *Step, // ICmpInst::Predicate *Pred, // ScalarEvolution *SE); + // + // static const ExtendOpTraitsBase::GetExtendAPIntTy GetExtendAPInt; }; template <> @@ -1205,6 +1208,7 @@ static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW; static const GetExtendExprTy GetExtendExpr; + static const GetExtendAPIntTy GetExtendAPInt; static const SCEV *getOverflowLimitForStep(const SCEV *Step, ICmpInst::Predicate *Pred, @@ -1216,11 +1220,15 @@ const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr; +const ExtendOpTraitsBase::GetExtendAPIntTy ExtendOpTraits< + SCEVSignExtendExpr>::GetExtendAPInt = &APInt::sext; + template <> struct ExtendOpTraits : public ExtendOpTraitsBase { static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW; static const GetExtendExprTy GetExtendExpr; + static const GetExtendAPIntTy GetExtendAPInt; static const SCEV *getOverflowLimitForStep(const SCEV *Step, ICmpInst::Predicate *Pred, @@ -1231,6 +1239,9 @@ const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr; + +const ExtendOpTraitsBase::GetExtendAPIntTy ExtendOpTraits< + SCEVZeroExtendExpr>::GetExtendAPInt = &APInt::zext; } // The recurrence AR has been shown to have no signed/unsigned wrap or something @@ -1325,6 +1336,92 @@ (SE->*GetExtendExpr)(PreStart, Ty)); } +// Try to prove away overflow by looking at "nearby" add recurrences. A +// motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it +// does not itself wrap then we can conclude that `{1,+,4}` is `nuw`. +// +// Formally: +// +// {S,+,X} == {S-T,+,X} + T +// => Ext({S,+,X}) == Ext({S-T,+,X} + T) +// +// If ({S-T,+,X} + T) does not overflow ... (1) +// +// RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T) +// +// If {S-T,+,X} does not overflow ... (2) +// +// RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T) +// == {Ext(S-T)+Ext(T),+,Ext(X)} +// +// If (S-T)+T does not overflow ... (3) +// +// RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)} +// == {Ext(S),+,Ext(X)} == LHS +// +// Thus, if (1), (2) and (3) are true for some T, then +// Ext({S,+,X}) == {Ext(S),+,Ext(X)} +// +// In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T +// is `Delta` (defined below). +// +template +bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start, + const SCEV *Step, + const Loop *L) { + auto WrapType = ExtendOpTraits::WrapType; + + // We restrict `Start` to a constant to prevent SCEV from spending too much + // time here. It is correct (but more expensive) to continue with a + // non-constant `Start` and do a general SCEV subtraction to compute + // `PreStart` below. + const SCEVConstant *StartC = dyn_cast(Start); + if (!StartC) + return false; + + APInt StartAI = StartC->getValue()->getValue(); + unsigned BitWidth = StartAI.getBitWidth(); + + auto Extend = [&](const APInt &AI) { + auto ExtendAPInt = ExtendOpTraits::GetExtendAPInt; + return (AI.*ExtendAPInt)(BitWidth * 2); + }; + + for (unsigned Delta : {-2, -1, 1, 2}) { + APInt DeltaAI(BitWidth, Delta, true); + APInt PreStartAI = StartAI - DeltaAI; + + if (Extend(PreStartAI + DeltaAI) != (Extend(PreStartAI) + Extend(DeltaAI))) + continue; // proves (3) + + const SCEV *PreStart = getConstant(PreStartAI); + + // Give up if we don't already have the add recurrence we need because + // actually constructing an add recurrence is relatively expensive. + const SCEVAddRecExpr *PreAR = [&]() { + FoldingSetNodeID ID; + ID.AddInteger(scAddRecExpr); + ID.AddPointer(PreStart); + ID.AddPointer(Step); + ID.AddPointer(L); + void *IP = nullptr; + return static_cast( + UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + }(); + + if (PreAR && PreAR->getNoWrapFlags(WrapType)) { // proves (2) + const SCEV *DeltaS = getConstant(StartC->getType(), Delta); + ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; + const SCEV *Limit = ExtendOpTraits::getOverflowLimitForStep( + DeltaS, &Pred, this); + if (Limit && isKnownPredicate(Pred, PreAR, Limit)) // proves (1) + return true; + } + } + + return false; +} + const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && @@ -1473,6 +1570,13 @@ } } } + + if (proveNoWrapByVaryingStart(Start, Step, L)) { + const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); + return getAddRecExpr( + getExtendAddRecStart(AR, Ty, this), + getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + } } // The cast wasn't folded; create an explicit cast node. @@ -1664,6 +1768,13 @@ return getAddExpr(Start, getSignExtendExpr(NewAR, Ty)); } } + + if (proveNoWrapByVaryingStart(Start, Step, L)) { + const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); + return getAddRecExpr( + getExtendAddRecStart(AR, Ty, this), + getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + } } // The cast wasn't folded; create an explicit cast node. Index: test/Analysis/ScalarEvolution/nowrap-preinc-limits.ll =================================================================== --- /dev/null +++ test/Analysis/ScalarEvolution/nowrap-preinc-limits.ll @@ -0,0 +1,44 @@ +; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s + +define void @f(i1* %condition) { +; CHECK-LABEL: Classifying expressions for: @f + entry: + br label %loop + + loop: + %idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ] + %idx.inc = add nsw i32 %idx, 1 + + %idx.inc2 = add i32 %idx.inc, 1 + %idx.inc2.zext = zext i32 %idx.inc2 to i64 + +; CHECK: %idx.inc2.zext = zext i32 %idx.inc2 to i64 +; CHECK-NEXT: --> {2,+,1}<%loop> + + %c = load volatile i1, i1* %condition + br i1 %c, label %loop, label %exit + + exit: + ret void +} + +define void @g(i1* %condition) { +; CHECK-LABEL: Classifying expressions for: @g + entry: + br label %loop + + loop: + %idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ] + %idx.inc = add nsw i32 %idx, 3 + + %idx.inc2 = add i32 %idx.inc, -1 + %idx.inc2.sext = sext i32 %idx.inc2 to i64 +; CHECK: %idx.inc2.sext = sext i32 %idx.inc2 to i64 +; CHECK-NEXT: --> {2,+,3}<%loop> + + %c = load volatile i1, i1* %condition + br i1 %c, label %loop, label %exit + + exit: + ret void +}