Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -4414,7 +4414,7 @@ // varying inside the loop. if (!isLoopInvariant(Accum, L)) return None; - + // *** Part2: Create the predicates // Analysis was successful: we have a phi-with-cast pattern for which we @@ -4464,27 +4464,70 @@ // // By induction, the same applies to all iterations 1<=i(PHISCEV); - - SCEVWrapPredicate::IncrementWrapFlags AddedFlags = - Signed ? SCEVWrapPredicate::IncrementNSSW - : SCEVWrapPredicate::IncrementNUSW; - const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags); - Predicates.push_back(AddRecPred); + getTruncateExpr(Accum, TruncTy), L, SCEV::FlagAnyWrap); + + // PHISCEV can be either a SCEVConstant or a SCEVAddRecExpr. + // ex: If truncated Accum is 0 and StartVal is a constant, then PHISCEV + // will be constant. + // If PHISCEV is a constant, then P1 degenerates into P2 or P3, so we don't + // add + // P1. + if (const auto *AR = dyn_cast(PHISCEV)) { + SCEVWrapPredicate::IncrementWrapFlags AddedFlags = + Signed ? SCEVWrapPredicate::IncrementNSSW + : SCEVWrapPredicate::IncrementNUSW; + const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags); + Predicates.push_back(AddRecPred); + } // Create the Equal Predicates P2,P3: - auto AppendPredicate = [&](const SCEV *Expr) -> void { + + // It is possible that the predicates P2 and/or P3 are computable at + // compile time due to StartVal and/or Accum being constants. + // If either one is, then we can check that now and escape if either P2 + // or P3 is false. + + // Construct the extended SCEV: (Ext ix (Trunc iy (Expr) to ix) to iy) + // for each of StartVal and Accum + auto GetExtendedExpr = [&](const SCEV *Expr) -> const SCEV * { assert(isLoopInvariant(Expr, L) && "Expr is expected to be invariant"); const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy); const SCEV *ExtendedExpr = Signed ? getSignExtendExpr(TruncatedExpr, Expr->getType()) : getZeroExtendExpr(TruncatedExpr, Expr->getType()); + return ExtendedExpr; + }; + + //#define OPTION_1 + auto PredIsKnownFalse = [&](const SCEV *Expr, + const SCEV *ExtendedExpr) -> bool { +#if defined(OPTION_1) + return isa(Expr) && Expr != ExtendedExpr; +#else + return Expr != ExtendedExpr && + isKnownPredicate(ICmpInst::ICMP_NE, Expr, ExtendedExpr); +#endif + }; + + const SCEV *StartExtended = GetExtendedExpr(StartVal); + if (PredIsKnownFalse(StartVal, StartExtended)) { + DEBUG(dbgs() << "P2 is compile-time false\n";); + return None; + } + + const SCEV *AccumExtended = GetExtendedExpr(Accum); + if (PredIsKnownFalse(Accum, AccumExtended)) { + DEBUG(dbgs() << "P3 is compile-time false\n";); + return None; + } + + auto AppendPredicate = [&](const SCEV *Expr, + const SCEV *ExtendedExpr) -> void { if (Expr != ExtendedExpr && !isKnownPredicate(ICmpInst::ICMP_EQ, Expr, ExtendedExpr)) { const SCEVPredicate *Pred = getEqualPredicate(Expr, ExtendedExpr); @@ -4492,10 +4535,10 @@ Predicates.push_back(Pred); } }; - - AppendPredicate(StartVal); - AppendPredicate(Accum); - + + AppendPredicate(StartVal, StartExtended); + AppendPredicate(Accum, AccumExtended); + // *** Part3: Predicates are ready. Now go ahead and create the new addrec in // which the casts had been folded away. The caller can rewrite SymbolicPHI // into NewAR if it will also add the runtime overflow checks specified in Index: test/Analysis/ScalarEvolution/overflow-addrec.ll =================================================================== --- /dev/null +++ test/Analysis/ScalarEvolution/overflow-addrec.ll @@ -0,0 +1,69 @@ +; RUN: opt -loop-vectorize -S < %s | FileCheck %s + +; These tests are all checking cornercases of SCEV's construction +; of a SCEVAddRec from a loop iv in the form: +; (SExt/ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum +; +; They ensure that the SCEV construction does not assert, or do something +; else undesirable to crash the compiler. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" +target triple = "x86_64-unknown-linux-gnu" + +; What happens if the invariant accum is a constant that is too large +; to be truncated accurately? +define void @const_overflow_accum() { +; CHECK-LABEL: @const_overflow_accum + +bb: +; CHECK: bb: +; CHECK: br label %bb1 + br label %bb1 + +bb1: +; CHECK: bb1: +; CHECK: %tmp = phi i64 [ -7, %bb ], [ %tmp4, %bb1 ] +; CHECK: %tmp2 = shl i64 %tmp, 32 +; CHECK: %tmp3 = ashr exact i64 %tmp2, 32 +; CHECK: %tmp4 = add i64 %tmp3, -9223372036854775808 +; CHECK: br + %tmp = phi i64 [ -7, %bb ], [ %tmp4, %bb1 ] + %tmp2 = shl i64 %tmp, 32 + %tmp3 = ashr exact i64 %tmp2, 32 + %tmp4 = add i64 %tmp3, -9223372036854775808 + br i1 undef, label %bb5, label %bb1 + +bb5: +; CHECK: bb5 +; CHECK: unreachable + unreachable +} + +; What happens if the start value is a constant that is too large +; to be truncated accurately? +define void @const_overflow_start() { +; CHECK-LABEL: @const_overflow_start + +bb: +; CHECK: bb: +; CHECK: br label %bb1 + br label %bb1 + +bb1: +; CHECK: bb1: +; CHECK: %tmp = phi i64 [ -9223372036854775808, %bb ], [ %tmp4, %bb1 ] +; CHECK: %tmp2 = shl i64 %tmp, 32 +; CHECK: %tmp3 = ashr exact i64 %tmp2, 32 +; CHECK: %tmp4 = add i64 %tmp3, -7 +; CHECK: br + %tmp = phi i64 [ -9223372036854775808, %bb ], [ %tmp4, %bb1 ] + %tmp2 = shl i64 %tmp, 32 + %tmp3 = ashr exact i64 %tmp2, 32 + %tmp4 = add i64 %tmp3, -7 + br i1 undef, label %bb5, label %bb1 + +bb5: +; CHECK: bb5 +; CHECK: unreachable + unreachable +}