Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -1761,6 +1761,14 @@ /// would trigger undefined behavior on overflow. SCEV::NoWrapFlags getNoWrapFlagsFromUB(const Value *V); + // Set no-wrap flags to \p AR and make updates related to it. In particular, + // we drop cached ranges for this AR because in presence of no-wrap flags + // they may be calculated more precisely. We would not need this if we could + // calculate them on creation, but currently we don't do it (in particular, + // to avoid circular logic with loop taken count calculation). + void setNoWrapFlagsToAddRecExpr(const SCEVAddRecExpr *AR, + SCEV::NoWrapFlags Flags); + /// Return true if the SCEV corresponding to \p I is never poison. Proving /// this is more complex than proving that just \p I is never poison, since /// SCEV commons expressions across control flow, and you can have cases Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -1621,7 +1621,7 @@ if (!AR->hasNoUnsignedWrap()) { auto NewFlags = proveNoWrapViaConstantRanges(AR); - const_cast(AR)->setNoWrapFlags(NewFlags); + setNoWrapFlagsToAddRecExpr(AR, NewFlags); } // If we have special knowledge that this addrec won't overflow, @@ -1670,7 +1670,7 @@ SCEV::FlagAnyWrap, Depth + 1); if (ZAdd == OperandExtendedAdd) { // Cache knowledge of AR NUW, which is propagated to this AddRec. - const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); + setNoWrapFlagsToAddRecExpr(AR, SCEV::FlagNUW); // Return the expression with the addrec on the outside. return getAddRecExpr( getExtendAddRecStart(AR, Ty, this, @@ -1689,7 +1689,7 @@ if (ZAdd == OperandExtendedAdd) { // Cache knowledge of AR NW, which is propagated to this AddRec. // Negative step causes unsigned wrap, but it still can't self-wrap. - const_cast(AR)->setNoWrapFlags(SCEV::FlagNW); + setNoWrapFlagsToAddRecExpr(AR, SCEV::FlagNW); // Return the expression with the addrec on the outside. return getAddRecExpr( getExtendAddRecStart(AR, Ty, this, @@ -1721,7 +1721,7 @@ isKnownOnEveryIteration(ICmpInst::ICMP_ULT, AR, N)) { // Cache knowledge of AR NUW, which is propagated to this // AddRec. - const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); + setNoWrapFlagsToAddRecExpr(AR, SCEV::FlagNUW); // Return the expression with the addrec on the outside. return getAddRecExpr( getExtendAddRecStart(AR, Ty, this, @@ -1737,7 +1737,7 @@ // Cache knowledge of AR NW, which is propagated to this // AddRec. Negative step causes unsigned wrap, but it // still can't self-wrap. - const_cast(AR)->setNoWrapFlags(SCEV::FlagNW); + setNoWrapFlagsToAddRecExpr(AR, SCEV::FlagNW); // Return the expression with the addrec on the outside. return getAddRecExpr( getExtendAddRecStart(AR, Ty, this, @@ -1749,7 +1749,7 @@ } if (proveNoWrapByVaryingStart(Start, Step, L)) { - const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); + setNoWrapFlagsToAddRecExpr(AR, SCEV::FlagNUW); return getAddRecExpr( getExtendAddRecStart(AR, Ty, this, Depth + 1), getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); @@ -1922,7 +1922,7 @@ if (!AR->hasNoSignedWrap()) { auto NewFlags = proveNoWrapViaConstantRanges(AR); - const_cast(AR)->setNoWrapFlags(NewFlags); + setNoWrapFlagsToAddRecExpr(AR, NewFlags); } // If we have special knowledge that this addrec won't overflow, @@ -1971,7 +1971,7 @@ SCEV::FlagAnyWrap, Depth + 1); if (SAdd == OperandExtendedAdd) { // Cache knowledge of AR NSW, which is propagated to this AddRec. - const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); + setNoWrapFlagsToAddRecExpr(AR, SCEV::FlagNSW); // Return the expression with the addrec on the outside. return getAddRecExpr( getExtendAddRecStart(AR, Ty, this, @@ -1996,7 +1996,7 @@ // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=> // (SAdd == OperandExtendedAdd => AR is NW) - const_cast(AR)->setNoWrapFlags(SCEV::FlagNW); + setNoWrapFlagsToAddRecExpr(AR, SCEV::FlagNW); // Return the expression with the addrec on the outside. return getAddRecExpr( @@ -2030,7 +2030,7 @@ (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) || isKnownOnEveryIteration(Pred, AR, OverflowLimit))) { // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec. - const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); + setNoWrapFlagsToAddRecExpr(AR, SCEV::FlagNSW); return getAddRecExpr( getExtendAddRecStart(AR, Ty, this, Depth + 1), getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); @@ -2056,7 +2056,7 @@ } if (proveNoWrapByVaryingStart(Start, Step, L)) { - const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); + setNoWrapFlagsToAddRecExpr(AR, SCEV::FlagNSW); return getAddRecExpr( getExtendAddRecStart(AR, Ty, this, Depth + 1), getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); @@ -5891,6 +5891,15 @@ return isSCEVExprNeverPoison(BinOp) ? Flags : SCEV::FlagAnyWrap; } +void ScalarEvolution::setNoWrapFlagsToAddRecExpr(const SCEVAddRecExpr *AR, + SCEV::NoWrapFlags Flags) { + // Bail if this AR already has these flags. + if (AR->getNoWrapFlags(Flags) == Flags) + return; + const_cast(AR)->setNoWrapFlags(Flags); + forgetMemoizedResults(AR); +} + bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) { // Here we check that I is in the header of the innermost loop containing I, // since we only deal with instructions in the loop header. The actual loop we @@ -6225,8 +6234,7 @@ // transfer the no-wrap flags, since an or won't introduce a wrap. if (const SCEVAddRecExpr *NewAR = dyn_cast(S)) { const SCEVAddRecExpr *OldAR = cast(LHS); - const_cast(NewAR)->setNoWrapFlags( - OldAR->getNoWrapFlags()); + setNoWrapFlagsToAddRecExpr(NewAR, OldAR->getNoWrapFlags()); } return S; } Index: test/Transforms/IndVarSimplify/full_widening.ll =================================================================== --- test/Transforms/IndVarSimplify/full_widening.ll +++ test/Transforms/IndVarSimplify/full_widening.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -indvars -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +; Make sure that we do not insert trunc in the loop. +define i32 @test_01(double* %p, double %x, i32* %np, i32* %mp, i32 %k) { +; CHECK-LABEL: @test_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[K:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV_WIDE:%.*]] = phi i64 [ [[CANONICAL_IV_NEXT_I:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[CANONICAL_IV_NEXT_I]] = add nuw nsw i64 [[IV_WIDE]], 1 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 [[IV_WIDE]] +; CHECK-NEXT: [[LOAD:%.*]] = load atomic double, double* [[GEP]] unordered, align 8 +; CHECK-NEXT: [[MUL:%.*]] = fmul double [[X:%.*]], [[LOAD]] +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds double, double* [[P]], i64 [[IV_WIDE]] +; CHECK-NEXT: store atomic double [[MUL]], double* [[GEP2]] unordered, align 8 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i64 [[CANONICAL_IV_NEXT_I]], [[TMP0]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; +entry: + br label %loop + +loop: + %iv.wide = phi i64 [ %canonical.iv.next.i, %loop ], [ 0, %entry ] + %iv.narrow = phi i32 [ %iv.narrow.next, %loop ], [ 0, %entry ] + %canonical.iv.next.i = add nuw nsw i64 %iv.wide, 1 + %zext = zext i32 %iv.narrow to i64 + %gep = getelementptr inbounds double, double* %p, i64 %zext + %load = load atomic double, double* %gep unordered, align 8 + %mul = fmul double %x, %load + %gep2 = getelementptr inbounds double, double* %p, i64 %zext + store atomic double %mul, double* %gep2 unordered, align 8 + %iv.narrow.next = add nuw nsw i32 %iv.narrow, 1 + %loop.cond = icmp slt i32 %iv.narrow.next, %k + br i1 %loop.cond, label %loop, label %exit + +exit: + ret i32 0 +}