Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -1148,6 +1148,193 @@ return S; } +// Get the limit of a recurrence such that incrementing by Step cannot cause +// signed overflow as long as the value of the recurrence within the +// loop does not exceed this limit before incrementing. +static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step, + ICmpInst::Predicate *Pred, + ScalarEvolution *SE) { + unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); + if (SE->isKnownPositive(Step)) { + *Pred = ICmpInst::ICMP_SLT; + return SE->getConstant(APInt::getSignedMinValue(BitWidth) - + SE->getSignedRange(Step).getSignedMax()); + } + if (SE->isKnownNegative(Step)) { + *Pred = ICmpInst::ICMP_SGT; + return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - + SE->getSignedRange(Step).getSignedMin()); + } + return nullptr; +} + +// Get the limit of a recurrence such that incrementing by Step cannot cause +// unsigned overflow as long as the value of the recurrence within the loop does +// not exceed this limit before incrementing. +static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step, + ICmpInst::Predicate *Pred, + ScalarEvolution *SE) { + unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); + *Pred = ICmpInst::ICMP_ULT; + + return SE->getConstant(APInt::getMinValue(BitWidth) - + SE->getUnsignedRange(Step).getUnsignedMax()); +} + +namespace { + +// Used to make code generic over signed and unsigned overflow. +template struct ExtendOpTraits { + // Members present: + // + // static const SCEV::NoWrapFlags WrapType; + // + // typedef + // const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *); + // static const GetExtendExprTy GetExtendExpr; + // + // static const SCEV *getOverflowLimitForStep(const SCEV *Step, + // ICmpInst::Predicate *Pred, + // ScalarEvolution *SE); +}; + +template <> struct ExtendOpTraits { + static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW; + + typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *); + static const GetExtendExprTy GetExtendExpr; + + static const SCEV *getOverflowLimitForStep(const SCEV *Step, + ICmpInst::Predicate *Pred, + ScalarEvolution *SE) { + return getSignedOverflowLimitForStep(Step, Pred, SE); + } +}; + +const ExtendOpTraits::GetExtendExprTy ExtendOpTraits< + SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr; + +template <> struct ExtendOpTraits { + static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW; + + typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *); + static const GetExtendExprTy GetExtendExpr; + + static const SCEV *getOverflowLimitForStep(const SCEV *Step, + ICmpInst::Predicate *Pred, + ScalarEvolution *SE) { + return getUnsignedOverflowLimitForStep(Step, Pred, SE); + } +}; + +const ExtendOpTraits::GetExtendExprTy ExtendOpTraits< + SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr; +} + +// The recurrence AR has been shown to have no signed/unsigned wrap or something +// close to it. Typically, if we can prove NSW/NUW for AR, then we can just as +// easily prove NSW/NUW for its preincrement or postincrement sibling. This +// allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step + +// Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the +// expression "Step + sext/zext(PreIncAR)" is congruent with +// "sext/zext(PostIncAR)" +template +static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, + ScalarEvolution *SE) { + auto WrapType = ExtendOpTraits::WrapType; + auto GetExtendExpr = ExtendOpTraits::GetExtendExpr; + + const Loop *L = AR->getLoop(); + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(*SE); + + // Check for a simple looking step prior to loop entry. + const SCEVAddExpr *SA = dyn_cast(Start); + if (!SA) + return nullptr; + + // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV + // subtraction is expensive. For this purpose, perform a quick and dirty + // difference, by checking for Step in the operand list. + SmallVector DiffOps; + for (const SCEV *Op : SA->operands()) + if (Op != Step) + DiffOps.push_back(Op); + + if (DiffOps.size() == SA->getNumOperands()) + return nullptr; + + // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` + + // `Step`: + + // 1. NSW/NUW flags on the step increment. + const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags()); + const SCEVAddRecExpr *PreAR = dyn_cast( + SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); + + // WARNING: FIXME: the optimization below assumes that a sign/zero-overflowing + // nsw/nuw operation is undefined behavior. This is strictly more aggressive + // than the interpretation of nsw in other parts of LLVM (for instance, they + // may unconditionally hoist nsw/nuw arithmetic through control flow). This + // logic needs to be revisited once we have a consistent semantics for poison + // values. + // + // "{S,+,X} is /" and "{S,+,X} is evaluated at least once" implies + // "S+X does not sign/unsign-overflow" (we'd have undefined behavior if it + // did). If `L->getExitingBlock() == L->getLoopLatch()` then `PreAR` (= + // {S,+,X}/) is evaluated every-time `AR` (= {S+X,+,X}) is + // evaluated, and hence within `AR` we are safe to assume that "S+X" will not + // sign/unsign-overflow. + // + + BasicBlock *ExitingBlock = L->getExitingBlock(); + BasicBlock *LatchBlock = L->getLoopLatch(); + if (PreAR && PreAR->getNoWrapFlags(WrapType) && + ExitingBlock != nullptr && ExitingBlock == LatchBlock) + return PreStart; + + // 2. Direct overflow check on the step operation's expression. + unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); + Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); + const SCEV *OperandExtendedStart = + SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy), + (SE->*GetExtendExpr)(Step, WideTy)); + if ((SE->*GetExtendExpr)(Start, WideTy) == OperandExtendedStart) { + if (PreAR && AR->getNoWrapFlags(WrapType)) { + // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW + // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then + // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`. Cache this fact. + const_cast(PreAR)->setNoWrapFlags(WrapType); + } + return PreStart; + } + + // 3. Loop precondition. + ICmpInst::Predicate Pred; + const SCEV *OverflowLimit = + ExtendOpTraits::getOverflowLimitForStep(Step, &Pred, SE); + + if (OverflowLimit && + SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) { + return PreStart; + } + return nullptr; +} + +// Get the normalized zero or sign extended expression for this AddRec's Start. +template +static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty, + ScalarEvolution *SE) { + auto GetExtendExpr = ExtendOpTraits::GetExtendExpr; + + const SCEV *PreStart = getPreStartForExtend(AR, Ty, SE); + if (!PreStart) + return (SE->*GetExtendExpr)(AR->getStart(), Ty); + + return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty), + (SE->*GetExtendExpr)(PreStart, Ty)); +} + const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && @@ -1201,9 +1388,9 @@ // If we have special knowledge that this addrec won't overflow, // we don't need to do any further analysis. if (AR->getNoWrapFlags(SCEV::FlagNUW)) - return getAddRecExpr(getZeroExtendExpr(Start, Ty), - getZeroExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart(AR, Ty, this), + getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are @@ -1240,9 +1427,10 @@ // Cache knowledge of AR NUW, which is propagated to this AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. - return getAddRecExpr(getZeroExtendExpr(Start, Ty), - getZeroExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart(AR, Ty, this), + getZeroExtendExpr(Step, Ty), + L, AR->getNoWrapFlags()); } // Similar to above, only this time treat the step value as signed. // This covers loops that count down. @@ -1255,9 +1443,10 @@ // Negative step causes unsigned wrap, but it still can't self-wrap. const_cast(AR)->setNoWrapFlags(SCEV::FlagNW); // Return the expression with the addrec on the outside. - return getAddRecExpr(getZeroExtendExpr(Start, Ty), - getSignExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart(AR, Ty, this), + getSignExtendExpr(Step, Ty), + L, AR->getNoWrapFlags()); } } @@ -1275,9 +1464,9 @@ // Cache knowledge of AR NUW, which is propagated to this AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. - return getAddRecExpr(getZeroExtendExpr(Start, Ty), - getZeroExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart(AR, Ty, this), + getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } } else if (isKnownNegative(Step)) { const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - @@ -1290,9 +1479,9 @@ // Negative step causes unsigned wrap, but it still can't self-wrap. const_cast(AR)->setNoWrapFlags(SCEV::FlagNW); // Return the expression with the addrec on the outside. - return getAddRecExpr(getZeroExtendExpr(Start, Ty), - getSignExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart(AR, Ty, this), + getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } } } @@ -1307,121 +1496,6 @@ return S; } -// Get the limit of a recurrence such that incrementing by Step cannot cause -// signed overflow as long as the value of the recurrence within the loop does -// not exceed this limit before incrementing. -static const SCEV *getOverflowLimitForStep(const SCEV *Step, - ICmpInst::Predicate *Pred, - ScalarEvolution *SE) { - unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); - if (SE->isKnownPositive(Step)) { - *Pred = ICmpInst::ICMP_SLT; - return SE->getConstant(APInt::getSignedMinValue(BitWidth) - - SE->getSignedRange(Step).getSignedMax()); - } - if (SE->isKnownNegative(Step)) { - *Pred = ICmpInst::ICMP_SGT; - return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - - SE->getSignedRange(Step).getSignedMin()); - } - return nullptr; -} - -// The recurrence AR has been shown to have no signed wrap or something close to -// it. Typically, if we can prove NSW for AR, then we can just as easily prove -// NSW for its preincrement or postincrement sibling. This allows normalizing a -// sign extended AddRec as such: {sext(Step + Start),+,Step} => {(Step + -// sext(Start),+,Step} As a result, the expression "Step + sext(PreIncAR)" is -// congruent with "sext(PostIncAR)" -static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, - Type *Ty, - ScalarEvolution *SE) { - const Loop *L = AR->getLoop(); - const SCEV *Start = AR->getStart(); - const SCEV *Step = AR->getStepRecurrence(*SE); - - // Check for a simple looking step prior to loop entry. - const SCEVAddExpr *SA = dyn_cast(Start); - if (!SA) - return nullptr; - - // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV - // subtraction is expensive. For this purpose, perform a quick and dirty - // difference, by checking for Step in the operand list. - SmallVector DiffOps; - for (const SCEV *Op : SA->operands()) - if (Op != Step) - DiffOps.push_back(Op); - - if (DiffOps.size() == SA->getNumOperands()) - return nullptr; - - // This is a postinc AR. Check for overflow on the preinc recurrence using the - // same three conditions that getSignExtendedExpr checks. - - // 1. NSW flags on the step increment. - const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags()); - const SCEVAddRecExpr *PreAR = dyn_cast( - SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); - - // WARNING: FIXME: the optimization below assumes that a sign-overflowing nsw - // operation is undefined behavior. This is strictly more aggressive than the - // interpretation of nsw in other parts of LLVM (for instance, they may - // unconditionally hoist nsw arithmetic through control flow). This logic - // needs to be revisited once we have a consistent semantics for poison - // values. - // - // "{S,+,X} is " and "{S,+,X} is evaluated at least once" implies "S+X - // does not sign-overflow" (we'd have undefined behavior if it did). If - // `L->getExitingBlock() == L->getLoopLatch()` then `PreAR` (= {S,+,X}) - // is evaluated every-time `AR` (= {S+X,+,X}) is evaluated, and hence within - // `AR` we are safe to assume that "S+X" will not sign-overflow. - // - - BasicBlock *ExitingBlock = L->getExitingBlock(); - BasicBlock *LatchBlock = L->getLoopLatch(); - if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW) && - ExitingBlock != nullptr && ExitingBlock == LatchBlock) - return PreStart; - - // 2. Direct overflow check on the step operation's expression. - unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); - Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); - const SCEV *OperandExtendedStart = - SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy), - SE->getSignExtendExpr(Step, WideTy)); - if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) { - // Cache knowledge of PreAR NSW. - if (PreAR && AR->getNoWrapFlags(SCEV::FlagNSW)) - const_cast(PreAR)->setNoWrapFlags(SCEV::FlagNSW); - // FIXME: this optimization needs a unit test - DEBUG(dbgs() << "SCEV: untested prestart overflow check\n"); - return PreStart; - } - - // 3. Loop precondition. - ICmpInst::Predicate Pred; - const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE); - - if (OverflowLimit && - SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) { - return PreStart; - } - return nullptr; -} - -// Get the normalized sign-extended expression for this AddRec's Start. -static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR, - Type *Ty, - ScalarEvolution *SE) { - const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE); - if (!PreStart) - return SE->getSignExtendExpr(AR->getStart(), Ty); - - return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty), - SE->getSignExtendExpr(PreStart, Ty)); -} - const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && @@ -1500,9 +1574,9 @@ // If we have special knowledge that this addrec won't overflow, // we don't need to do any further analysis. if (AR->getNoWrapFlags(SCEV::FlagNSW)) - return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), - getSignExtendExpr(Step, Ty), - L, SCEV::FlagNSW); + return getAddRecExpr( + getExtendAddRecStart(AR, Ty, this), + getSignExtendExpr(Step, Ty), L, SCEV::FlagNSW); // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are @@ -1539,9 +1613,9 @@ // Cache knowledge of AR NSW, which is propagated to this AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. - return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), - getSignExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart(AR, Ty, this), + getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } // Similar to above, only this time treat the step value as unsigned. // This covers loops that count up with an unsigned step. @@ -1561,9 +1635,9 @@ const_cast(AR)->setNoWrapFlags(SCEV::FlagNW); // Return the expression with the addrec on the outside. - return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), - getZeroExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart(AR, Ty, this), + getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } } @@ -1572,7 +1646,8 @@ // with the start value and the backedge is guarded by a comparison // with the post-inc value, the addrec is safe. ICmpInst::Predicate Pred; - const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this); + const SCEV *OverflowLimit = + getSignedOverflowLimitForStep(Step, &Pred, this); if (OverflowLimit && (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) || (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) && @@ -1580,9 +1655,9 @@ OverflowLimit)))) { // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); - return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), - getSignExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); + return getAddRecExpr( + getExtendAddRecStart(AR, Ty, this), + getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } } // If Start and Step are constants, check if we can apply this Index: test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll =================================================================== --- /dev/null +++ test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll @@ -0,0 +1,97 @@ +; ; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s + +define void @infer.sext.0(i1* %c, i32 %start) { +; CHECK-LABEL: Classifying expressions for: @infer.sext.0 + entry: + br label %loop + + loop: + %idx = phi i32 [ %start, %entry ], [ %idx.inc, %loop ] + %idx.inc = add nsw i32 %idx, 1 + %idx.inc.sext = sext i32 %idx.inc to i64 +; CHECK: %idx.inc.sext = sext i32 %idx.inc to i64 +; CHECK-NEXT: --> {(1 + (sext i32 %start to i64)),+,1}<%loop> + %condition = load volatile i1* %c + br i1 %condition, label %exit, label %loop + + exit: + ret void +} + +define void @infer.zext.0(i1* %c, i32 %start) { +; CHECK-LABEL: Classifying expressions for: @infer.zext.0 + entry: + br label %loop + + loop: + %idx = phi i32 [ %start, %entry ], [ %idx.inc, %loop ] + %idx.inc = add nuw i32 %idx, 1 + %idx.inc.sext = zext i32 %idx.inc to i64 +; CHECK: %idx.inc.sext = zext i32 %idx.inc to i64 +; CHECK-NEXT: --> {(1 + (zext i32 %start to i64)),+,1}<%loop> + %condition = load volatile i1* %c + br i1 %condition, label %exit, label %loop + + exit: + ret void +} + +define void @infer.sext.1(i32 %start, i1* %c) { +; CHECK-LABEL: Classifying expressions for: @infer.sext.1 + entry: + %start.mul = mul i32 %start, 4 + %start.real = add i32 %start.mul, 2 + br label %loop + + loop: + %idx = phi i32 [ %start.real, %entry ], [ %idx.inc, %loop ] + %idx.sext = sext i32 %idx to i64 +; CHECK: %idx.sext = sext i32 %idx to i64 +; CHECK-NEXT: --> {(2 + (sext i32 (4 * %start) to i64)),+,2}<%loop> + %idx.inc = add nsw i32 %idx, 2 + %condition = load i1* %c + br i1 %condition, label %exit, label %loop + + exit: + ret void +} + +define void @infer.sext.2(i1* %c, i8 %start) { +; CHECK-LABEL: Classifying expressions for: @infer.sext.2 + entry: + %start.inc = add i8 %start, 1 + %entry.condition = icmp slt i8 %start, 127 + br i1 %entry.condition, label %loop, label %exit + + loop: + %idx = phi i8 [ %start.inc, %entry ], [ %idx.inc, %loop ] + %idx.sext = sext i8 %idx to i16 +; CHECK: %idx.sext = sext i8 %idx to i16 +; CHECK-NEXT: --> {(1 + (sext i8 %start to i16)),+,1}<%loop> + %idx.inc = add nsw i8 %idx, 1 + %condition = load volatile i1* %c + br i1 %condition, label %exit, label %loop + + exit: + ret void +} + +define void @infer.zext.1(i1* %c, i8 %start) { +; CHECK-LABEL: Classifying expressions for: @infer.zext.1 + entry: + %start.inc = add i8 %start, 1 + %entry.condition = icmp ult i8 %start, 255 + br i1 %entry.condition, label %loop, label %exit + + loop: + %idx = phi i8 [ %start.inc, %entry ], [ %idx.inc, %loop ] + %idx.zext = zext i8 %idx to i16 +; CHECK: %idx.zext = zext i8 %idx to i16 +; CHECK-NEXT: --> {(1 + (zext i8 %start to i16)),+,1}<%loop> + %idx.inc = add nuw i8 %idx, 1 + %condition = load volatile i1* %c + br i1 %condition, label %exit, label %loop + + exit: + ret void +}