Index: llvm/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/lib/Analysis/ScalarEvolution.cpp +++ llvm/lib/Analysis/ScalarEvolution.cpp @@ -2781,7 +2781,8 @@ // If we found some loop invariants, fold them into the recurrence. if (!LIOps.empty()) { // Compute nowrap flags for the addition of the loop-invariant ops and - // the addrec. Temporarily push it as an operand for that purpose. + // the addrec. Temporarily push it as an operand for that purpose. These + // flags are valid in the scope of the addrec only. LIOps.push_back(AddRec); SCEV::NoWrapFlags Flags = ComputeFlags(LIOps); LIOps.pop_back(); @@ -2790,10 +2791,48 @@ LIOps.push_back(AddRec->getStart()); SmallVector AddRecOps(AddRec->operands()); - // This follows from the fact that the no-wrap flags on the outer add - // expression are applicable on the 0th iteration, when the add recurrence - // will be equal to its start value. - AddRecOps[0] = getAddExpr(LIOps, Flags, Depth + 1); + + // It is not in general safe to propagate flags valid on an add within + // the addrec scope to one outside it. We must prove that the inner + // scope is guaranteed to execute if the outer one does to be able to + // safely propagate. We know the program is undefined if poison is + // produced on the inner scoped addrec. We also know that *for this use* + // the outer scoped add can't overflow (because of the flags we just + // computed for the inner scoped add) without the program being undefined. + // Proving that entry to the outer scope neccesitates entry to the inner + // scope, thus proves the program undefined if the flags would be violated + // in the outer scope. + auto AddFlags = SCEV::FlagAnyWrap; + bool CanPropagateFlags = [&]() { + auto *SinglePred = AddRecLoop->getLoopPreheader(); + if (!SinglePred || !SinglePred->getSingleSuccessor()) + return false; + // We only handle the case where the outer scope is simply the loop + // preheader for the moment. This could be generalized significantly + // but we'd need to cache the expensive guaranteed-to-execute walk. + bool FoundScope = llvm::any_of(LIOps, [&](const SCEV *S) { + if (auto *LIAddRec = dyn_cast(S)) { + if (LIAddRec->getLoop()->getHeader() == SinglePred) + return true; + } else if (auto *U = dyn_cast(S)) { + if (auto *I = dyn_cast(U->getValue())) + // TODO: We could return the exact point of definition for + // SCEVUnknowns which would let us handle blocks with an unknown + // defined after a point which might e.g. throw. + return I->getParent() == SinglePred; + return SinglePred == &SinglePred->getParent()->getEntryBlock(); + } else if (isa(S)) { + return SinglePred == &SinglePred->getParent()->getEntryBlock(); + } + // TODO: We could recurse through other SCEVs here if warranted. + return false; + }); + return (FoundScope && + isGuaranteedToTransferExecutionToSuccessor(SinglePred)); + }(); + if (CanPropagateFlags) + AddFlags = Flags; + AddRecOps[0] = getAddExpr(LIOps, AddFlags, Depth + 1); // Build the new addrec. Propagate the NUW and NSW flags if both the // outer add and the inner addrec are guaranteed to have no overflow. Index: llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll +++ llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll @@ -1051,7 +1051,7 @@ ; CHECK-NEXT: %index32 = sub nsw i32 %i, %sub ; CHECK-NEXT: --> {((-1 * %sub) + %start),+,1}<%loop> U: full-set S: full-set Exits: (-1 + (-1 * %sub) + %numIterations) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index64 = sext i32 %index32 to i64 -; CHECK-NEXT: --> {((sext i32 %start to i64) + (-1 * (sext i32 %sub to i64))),+,1}<%loop> U: [-4294967295,8589934591) S: [-4294967295,8589934591) Exits: ((zext i32 (-1 + (-1 * %start) + %numIterations) to i64) + (sext i32 %start to i64) + (-1 * (sext i32 %sub to i64))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {((sext i32 %start to i64) + (-1 * (sext i32 %sub to i64))),+,1}<%loop> U: [-4294967295,8589934591) S: [-4294967295,8589934591) Exits: ((zext i32 (-1 + (-1 * %start) + %numIterations) to i64) + (sext i32 %start to i64) + (-1 * (sext i32 %sub to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %ptr = getelementptr inbounds float, float* %input, i64 %index64 ; CHECK-NEXT: --> {((4 * (sext i32 %start to i64)) + (-4 * (sext i32 %sub to i64)) + %input),+,4}<%loop> U: full-set S: full-set Exits: ((4 * (zext i32 (-1 + (-1 * %start) + %numIterations) to i64)) + (4 * (sext i32 %start to i64)) + (-4 * (sext i32 %sub to i64)) + %input) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %nexti = add nsw i32 %i, 1 @@ -1092,7 +1092,7 @@ ; CHECK-NEXT: %index32 = sub nsw i32 %i, %halfsub ; CHECK-NEXT: --> {((-1 * %halfsub) + %start),+,1}<%loop> U: full-set S: full-set Exits: (-1 + (-1 * %halfsub) + %numIterations) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index64 = sext i32 %index32 to i64 -; CHECK-NEXT: --> {((sext i32 %start to i64) + (-1 * (sext i32 %halfsub to i64))),+,1}<%loop> U: [-3221225471,7516192767) S: [-3221225471,7516192767) Exits: ((zext i32 (-1 + (-1 * %start) + %numIterations) to i64) + (sext i32 %start to i64) + (-1 * (sext i32 %halfsub to i64))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {((sext i32 %start to i64) + (-1 * (sext i32 %halfsub to i64))),+,1}<%loop> U: [-3221225471,7516192767) S: [-3221225471,7516192767) Exits: ((zext i32 (-1 + (-1 * %start) + %numIterations) to i64) + (sext i32 %start to i64) + (-1 * (sext i32 %halfsub to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %ptr = getelementptr inbounds float, float* %input, i64 %index64 ; CHECK-NEXT: --> {((4 * (sext i32 %start to i64)) + (-4 * (sext i32 %halfsub to i64)) + %input),+,4}<%loop> U: full-set S: full-set Exits: ((4 * (zext i32 (-1 + (-1 * %start) + %numIterations) to i64)) + (4 * (sext i32 %start to i64)) + (-4 * (sext i32 %halfsub to i64)) + %input) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %nexti = add nsw i32 %i, 1 Index: llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll +++ llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll @@ -21,7 +21,7 @@ ; CHECK-NEXT: %idxprom20 = zext i32 %storemerge1921 to i64 ; CHECK-NEXT: --> {3,+,4294967295}<%for.cond6> U: [3,4) S: [3,4) Exits: <> LoopDispositions: { %for.cond6: Computable, %outer.loop: Variant } ; CHECK-NEXT: %arrayidx7 = getelementptr inbounds [1 x [4 x i16]], [1 x [4 x i16]]* @__const.f.g, i64 0, i64 0, i64 %idxprom20 -; CHECK-NEXT: --> {(6 + @__const.f.g),+,8589934590}<%for.cond6> U: [6,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %for.cond6: Computable, %outer.loop: Variant } +; CHECK-NEXT: --> {(6 + @__const.f.g),+,8589934590}<%for.cond6> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %for.cond6: Computable, %outer.loop: Variant } ; CHECK-NEXT: %i = load i16, i16* %arrayidx7, align 2 ; CHECK-NEXT: --> %i U: full-set S: full-set Exits: <> LoopDispositions: { %for.cond6: Variant, %outer.loop: Variant } ; CHECK-NEXT: %storemerge1822.lcssa.ph = phi i32 [ 0, %for.cond6 ] @@ -45,7 +45,7 @@ ; CHECK-NEXT: %idxprom20.3 = zext i32 %storemerge1921.3 to i64 ; CHECK-NEXT: --> {3,+,4294967295}<%inner.loop> U: [3,4) S: [3,4) Exits: <> LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant } ; CHECK-NEXT: %arrayidx7.3 = getelementptr inbounds [1 x [4 x i16]], [1 x [4 x i16]]* @__const.f.g, i64 0, i64 0, i64 %idxprom20.3 -; CHECK-NEXT: --> {(6 + @__const.f.g),+,8589934590}<%inner.loop> U: [6,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant } +; CHECK-NEXT: --> {(6 + @__const.f.g),+,8589934590}<%inner.loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant } ; CHECK-NEXT: %i7 = load i16, i16* %arrayidx7.3, align 2 ; CHECK-NEXT: --> %i7 U: full-set S: full-set Exits: <> LoopDispositions: { %inner.loop: Variant, %outer.loop: Variant } ; CHECK-NEXT: %i8 = load volatile i32, i32* @b, align 4 Index: llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll +++ llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll @@ -911,7 +911,7 @@ ; CHECK-NEXT: %lastout.2271 = phi i8* [ %incdec.ptr126, %while.body125 ], [ %ptr, %while.end117 ] ; CHECK-NEXT: --> {%ptr,+,1}<%while.body125> U: full-set S: full-set Exits: {(-2 + (-1 * (ptrtoint i8* %ptr to i64)) + %ptr),+,-1}<%while.cond111> LoopDispositions: { %while.body125: Computable } ; CHECK-NEXT: %incdec.ptr126 = getelementptr inbounds i8, i8* %lastout.2271, i64 1 -; CHECK-NEXT: --> {(1 + %ptr),+,1}<%while.body125> U: [1,0) S: [1,0) Exits: {(-1 + (-1 * (ptrtoint i8* %ptr to i64)) + %ptr),+,-1}<%while.cond111> LoopDispositions: { %while.body125: Computable } +; CHECK-NEXT: --> {(1 + %ptr),+,1}<%while.body125> U: full-set S: full-set Exits: {(-1 + (-1 * (ptrtoint i8* %ptr to i64)) + %ptr),+,-1}<%while.cond111> LoopDispositions: { %while.body125: Computable } ; CHECK-NEXT: Determining loop execution counts for: @crash ; CHECK-NEXT: Loop %while.body125: backedge-taken count is {(-2 + (-1 * (ptrtoint i8* %ptr to i64))),+,-1}<%while.cond111> ; CHECK-NEXT: Loop %while.body125: max backedge-taken count is -2 Index: llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll +++ llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll @@ -127,13 +127,13 @@ ; CHECK-NEXT: %init = getelementptr inbounds i32, i32* %startptr, i64 2000 ; CHECK-NEXT: --> (8000 + %startptr) U: [8000,0) S: [8000,0) ; CHECK-NEXT: %iv = phi i32* [ %init, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {(8000 + %startptr),+,4}<%loop> U: [8000,0) S: [8000,0) Exits: (8000 + (4 * ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + (ptrtoint i32* %endptr to i64)) /u 4)) + %startptr) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(8000 + %startptr),+,4}<%loop> U: [8000,0) S: [8000,0) Exits: (8000 + (4 * ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + ((8004 + (ptrtoint i32* %startptr to i64)) umax (ptrtoint i32* %endptr to i64))) /u 4)) + %startptr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = getelementptr inbounds i32, i32* %iv, i64 1 -; CHECK-NEXT: --> {(8004 + %startptr),+,4}<%loop> U: [8004,0) S: [8004,0) Exits: (8004 + (4 * ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + (ptrtoint i32* %endptr to i64)) /u 4)) + %startptr) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(8004 + %startptr),+,4}<%loop> U: full-set S: full-set Exits: (8004 + (4 * ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + ((8004 + (ptrtoint i32* %startptr to i64)) umax (ptrtoint i32* %endptr to i64))) /u 4)) + %startptr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @pointer_iv_nowrap_guard -; CHECK-NEXT: Loop %loop: backedge-taken count is ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + (ptrtoint i32* %endptr to i64)) /u 4) -; CHECK-NEXT: Loop %loop: max backedge-taken count is 4611686018427385902 -; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + (ptrtoint i32* %endptr to i64)) /u 4) +; CHECK-NEXT: Loop %loop: backedge-taken count is ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + ((8004 + (ptrtoint i32* %startptr to i64)) umax (ptrtoint i32* %endptr to i64))) /u 4) +; CHECK-NEXT: Loop %loop: max backedge-taken count is 4611686018427387903 +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + ((8004 + (ptrtoint i32* %startptr to i64)) umax (ptrtoint i32* %endptr to i64))) /u 4) ; CHECK-NEXT: Predicates: ; CHECK: Loop %loop: Trip multiple is 1 ; Index: llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll +++ llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll @@ -30,13 +30,13 @@ ; CHECK-NEXT: %8 = sext i32 %7 to i64 ; CHECK-NEXT: --> {1,+,2}<%bb> U: [1,2147483646) S: [1,2147483646) Exits: (1 + (2 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2))) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %9 = getelementptr inbounds double, double* %q, i64 %8 -; CHECK-NEXT: --> {(8 + %q),+,16}<%bb> U: [8,0) S: [8,0) Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2)) + %q) LoopDispositions: { %bb: Computable } +; CHECK-NEXT: --> {(8 + %q),+,16}<%bb> U: full-set S: full-set Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2)) + %q) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %t7 = add nsw i32 %i.01, 1 ; CHECK-NEXT: --> {1,+,2}<%bb> U: [1,2147483646) S: [1,2147483646) Exits: (1 + (2 * ((-1 + (2 * (%no /u 2))) /u 2))) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %t8 = sext i32 %t7 to i64 ; CHECK-NEXT: --> {1,+,2}<%bb> U: [1,2147483646) S: [1,2147483646) Exits: (1 + (2 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2))) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %t9 = getelementptr inbounds double, double* %q, i64 %t8 -; CHECK-NEXT: --> {(8 + %q),+,16}<%bb> U: [8,0) S: [8,0) Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2)) + %q) LoopDispositions: { %bb: Computable } +; CHECK-NEXT: --> {(8 + %q),+,16}<%bb> U: full-set S: full-set Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2)) + %q) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %14 = sext i32 %i.01 to i64 ; CHECK-NEXT: --> {0,+,2}<%bb> U: [0,2147483645) S: [0,2147483645) Exits: (2 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2)) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %15 = getelementptr inbounds double, double* %d, i64 %14 Index: llvm/test/Analysis/ScalarEvolution/nsw-offset.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/nsw-offset.ll +++ llvm/test/Analysis/ScalarEvolution/nsw-offset.ll @@ -27,13 +27,13 @@ ; CHECK-NEXT: %8 = sext i32 %7 to i64 ; CHECK-NEXT: --> {1,+,2}<%bb> U: [1,2147483646) S: [1,2147483646) Exits: (1 + (2 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2))) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %9 = getelementptr inbounds double, double* %q, i64 %8 -; CHECK-NEXT: --> {(8 + %q),+,16}<%bb> U: [8,0) S: [8,0) Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2)) + %q) LoopDispositions: { %bb: Computable } +; CHECK-NEXT: --> {(8 + %q),+,16}<%bb> U: full-set S: full-set Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2)) + %q) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %t7 = add nsw i32 %i.01, 1 ; CHECK-NEXT: --> {1,+,2}<%bb> U: [1,2147483646) S: [1,2147483646) Exits: (1 + (2 * ((-1 + (2 * (%no /u 2))) /u 2))) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %t8 = sext i32 %t7 to i64 ; CHECK-NEXT: --> {1,+,2}<%bb> U: [1,2147483646) S: [1,2147483646) Exits: (1 + (2 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2))) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %t9 = getelementptr inbounds double, double* %q, i64 %t8 -; CHECK-NEXT: --> {(8 + %q),+,16}<%bb> U: [8,0) S: [8,0) Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2)) + %q) LoopDispositions: { %bb: Computable } +; CHECK-NEXT: --> {(8 + %q),+,16}<%bb> U: full-set S: full-set Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2)) + %q) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %14 = sext i32 %i.01 to i64 ; CHECK-NEXT: --> {0,+,2}<%bb> U: [0,2147483645) S: [0,2147483645) Exits: (2 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2)) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %15 = getelementptr inbounds double, double* %d, i64 %14 Index: llvm/test/Analysis/ScalarEvolution/nsw.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/nsw.ll +++ llvm/test/Analysis/ScalarEvolution/nsw.ll @@ -25,7 +25,7 @@ ; CHECK-NEXT: %phitmp = sext i32 %tmp8 to i64 ; CHECK-NEXT: --> {1,+,1}<%bb> U: [1,-9223372036854775808) S: [1,-9223372036854775808) Exits: <> LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %tmp9 = getelementptr inbounds double, double* %p, i64 %phitmp -; CHECK-NEXT: --> {(8 + %p),+,8}<%bb> U: [8,0) S: [8,0) Exits: <> LoopDispositions: { %bb: Computable } +; CHECK-NEXT: --> {(8 + %p),+,8}<%bb> U: full-set S: full-set Exits: <> LoopDispositions: { %bb: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test1 ; CHECK-NEXT: Loop %bb: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %bb: Unpredictable max backedge-taken count. @@ -73,7 +73,7 @@ ; CHECK-NEXT: %__first.addr.02.i.i = phi i32* [ %begin, %for.body.lr.ph.i.i ], [ %ptrincdec.i.i, %for.body.i.i ] ; CHECK-NEXT: --> {%begin,+,4}<%for.body.i.i> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint i32* %begin to i64)) + (ptrtoint i32* %end to i64)) /u 4)) + %begin) LoopDispositions: { %for.body.i.i: Computable } ; CHECK-NEXT: %ptrincdec.i.i = getelementptr inbounds i32, i32* %__first.addr.02.i.i, i64 1 -; CHECK-NEXT: --> {(4 + %begin),+,4}<%for.body.i.i> U: [4,0) S: [4,0) Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint i32* %begin to i64)) + (ptrtoint i32* %end to i64)) /u 4)) + %begin) LoopDispositions: { %for.body.i.i: Computable } +; CHECK-NEXT: --> {(4 + %begin),+,4}<%for.body.i.i> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint i32* %begin to i64)) + (ptrtoint i32* %end to i64)) /u 4)) + %begin) LoopDispositions: { %for.body.i.i: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test2 ; CHECK-NEXT: Loop %for.body.i.i: backedge-taken count is ((-4 + (-1 * (ptrtoint i32* %begin to i64)) + (ptrtoint i32* %end to i64)) /u 4) ; CHECK-NEXT: Loop %for.body.i.i: max backedge-taken count is 4611686018427387903 @@ -111,7 +111,7 @@ ; CHECK-NEXT: %tmp = add nsw i64 %indvar.i.i, 1 ; CHECK-NEXT: --> {1,+,1}<%for.body.i.i> U: [1,4611686018427387905) S: [1,4611686018427387905) Exits: (1 + ((-4 + (-1 * (ptrtoint i32* %begin to i64)) + (ptrtoint i32* %end to i64)) /u 4)) LoopDispositions: { %for.body.i.i: Computable } ; CHECK-NEXT: %ptrincdec.i.i = getelementptr inbounds i32, i32* %begin, i64 %tmp -; CHECK-NEXT: --> {(4 + %begin),+,4}<%for.body.i.i> U: [4,0) S: [4,0) Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint i32* %begin to i64)) + (ptrtoint i32* %end to i64)) /u 4)) + %begin) LoopDispositions: { %for.body.i.i: Computable } +; CHECK-NEXT: --> {(4 + %begin),+,4}<%for.body.i.i> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint i32* %begin to i64)) + (ptrtoint i32* %end to i64)) /u 4)) + %begin) LoopDispositions: { %for.body.i.i: Computable } ; CHECK-NEXT: %__first.addr.08.i.i = getelementptr inbounds i32, i32* %begin, i64 %indvar.i.i ; CHECK-NEXT: --> {%begin,+,4}<%for.body.i.i> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint i32* %begin to i64)) + (ptrtoint i32* %end to i64)) /u 4)) + %begin) LoopDispositions: { %for.body.i.i: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test3 Index: llvm/test/Analysis/ScalarEvolution/ptrtoint.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/ptrtoint.ll +++ llvm/test/Analysis/ScalarEvolution/ptrtoint.ll @@ -392,7 +392,7 @@ ; X64-NEXT: %i13 = add i8 %i12, %i8 ; X64-NEXT: --> (%i12 + %i8) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i14 = getelementptr inbounds i8, i8* %i7, i64 1 -; X64-NEXT: --> {(1 + %arg),+,1}<%bb6> U: [1,0) S: [1,0) Exits: ((-1 * (ptrtoint i8* %arg to i64)) + (ptrtoint i8* %arg1 to i64) + %arg) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {(1 + %arg),+,1}<%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint i8* %arg to i64)) + (ptrtoint i8* %arg1 to i64) + %arg) LoopDispositions: { %bb6: Computable } ; X64-NEXT: Determining loop execution counts for: @pr46786_c26_char ; X64-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint i8* %arg to i64)) + (ptrtoint i8* %arg1 to i64)) ; X64-NEXT: Loop %bb6: max backedge-taken count is -1 @@ -419,7 +419,7 @@ ; X32-NEXT: %i13 = add i8 %i12, %i8 ; X32-NEXT: --> (%i12 + %i8) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i14 = getelementptr inbounds i8, i8* %i7, i64 1 -; X32-NEXT: --> {(1 + %arg),+,1}<%bb6> U: [1,0) S: [1,0) Exits: ((-1 * (ptrtoint i8* %arg to i32)) + (ptrtoint i8* %arg1 to i32) + %arg) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {(1 + %arg),+,1}<%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint i8* %arg to i32)) + (ptrtoint i8* %arg1 to i32) + %arg) LoopDispositions: { %bb6: Computable } ; X32-NEXT: Determining loop execution counts for: @pr46786_c26_char ; X32-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint i8* %arg to i32)) + (ptrtoint i8* %arg1 to i32)) ; X32-NEXT: Loop %bb6: max backedge-taken count is -1 @@ -479,7 +479,7 @@ ; X64-NEXT: %i14 = add nsw i32 %i13, %i8 ; X64-NEXT: --> (%i13 + %i8) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i15 = getelementptr inbounds i32, i32* %i7, i64 1 -; X64-NEXT: --> {(4 + %arg),+,4}<%bb6> U: [4,0) S: [4,0) Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint i32* %arg to i64)) + (ptrtoint i32* %arg1 to i64)) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {(4 + %arg),+,4}<%bb6> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint i32* %arg to i64)) + (ptrtoint i32* %arg1 to i64)) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } ; X64-NEXT: Determining loop execution counts for: @pr46786_c26_int ; X64-NEXT: Loop %bb6: backedge-taken count is ((-4 + (-1 * (ptrtoint i32* %arg to i64)) + (ptrtoint i32* %arg1 to i64)) /u 4) ; X64-NEXT: Loop %bb6: max backedge-taken count is 4611686018427387903 @@ -508,7 +508,7 @@ ; X32-NEXT: %i14 = add nsw i32 %i13, %i8 ; X32-NEXT: --> (%i13 + %i8) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i15 = getelementptr inbounds i32, i32* %i7, i64 1 -; X32-NEXT: --> {(4 + %arg),+,4}<%bb6> U: [4,0) S: [4,0) Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint i32* %arg to i32)) + (ptrtoint i32* %arg1 to i32)) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {(4 + %arg),+,4}<%bb6> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint i32* %arg to i32)) + (ptrtoint i32* %arg1 to i32)) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } ; X32-NEXT: Determining loop execution counts for: @pr46786_c26_int ; X32-NEXT: Loop %bb6: backedge-taken count is ((-4 + (-1 * (ptrtoint i32* %arg to i32)) + (ptrtoint i32* %arg1 to i32)) /u 4) ; X32-NEXT: Loop %bb6: max backedge-taken count is 1073741823 Index: llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll =================================================================== --- llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll +++ llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll @@ -6,9 +6,6 @@ ; CHECK-LABEL: foo: ; CHECK: # %bb.0: ; CHECK-NEXT: cmpd 5, 7 -; CHECK-NEXT: std 19, -104(1) # 8-byte Folded Spill -; CHECK-NEXT: std 20, -96(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, -88(1) # 8-byte Folded Spill ; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill ; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill ; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill @@ -26,90 +23,84 @@ ; CHECK-NEXT: mulld 12, 8, 5 ; CHECK-NEXT: addi 29, 3, 16 ; CHECK-NEXT: mulld 0, 9, 8 -; CHECK-NEXT: mr 25, 12 +; CHECK-NEXT: sldi 11, 10, 3 ; CHECK-NEXT: mulld 30, 8, 30 ; CHECK-NEXT: mulld 28, 8, 28 ; CHECK-NEXT: mulld 8, 8, 27 -; CHECK-NEXT: sldi 11, 10, 3 -; CHECK-NEXT: li 27, 0 -; CHECK-NEXT: mr 26, 30 ; CHECK-NEXT: b .LBB0_3 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: add 5, 5, 9 -; CHECK-NEXT: add 25, 25, 0 -; CHECK-NEXT: add 26, 26, 0 +; CHECK-NEXT: add 12, 12, 0 +; CHECK-NEXT: add 30, 30, 0 ; CHECK-NEXT: add 28, 28, 0 ; CHECK-NEXT: add 8, 8, 0 -; CHECK-NEXT: addi 27, 27, 1 ; CHECK-NEXT: cmpd 5, 7 ; CHECK-NEXT: bge 0, .LBB0_6 ; CHECK-NEXT: .LBB0_3: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_5 Depth 2 -; CHECK-NEXT: sub 24, 5, 10 -; CHECK-NEXT: cmpd 6, 24 +; CHECK-NEXT: sub 27, 5, 10 +; CHECK-NEXT: cmpd 6, 27 ; CHECK-NEXT: bge 0, .LBB0_2 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: maddld 19, 0, 27, 30 -; CHECK-NEXT: maddld 20, 0, 27, 12 -; CHECK-NEXT: add 22, 6, 28 -; CHECK-NEXT: add 21, 6, 8 -; CHECK-NEXT: add 20, 6, 20 -; CHECK-NEXT: add 19, 6, 19 -; CHECK-NEXT: sldi 23, 6, 3 +; CHECK-NEXT: add 23, 6, 12 +; CHECK-NEXT: add 22, 6, 30 +; CHECK-NEXT: add 25, 6, 28 +; CHECK-NEXT: add 24, 6, 8 +; CHECK-NEXT: sldi 26, 6, 3 +; CHECK-NEXT: sldi 25, 25, 3 +; CHECK-NEXT: sldi 24, 24, 3 +; CHECK-NEXT: sldi 23, 23, 3 ; CHECK-NEXT: sldi 22, 22, 3 -; CHECK-NEXT: sldi 21, 21, 3 -; CHECK-NEXT: add 23, 4, 23 -; CHECK-NEXT: add 22, 29, 22 -; CHECK-NEXT: add 21, 29, 21 -; CHECK-NEXT: sldi 20, 20, 3 -; CHECK-NEXT: sldi 19, 19, 3 -; CHECK-NEXT: add 20, 3, 20 -; CHECK-NEXT: add 19, 3, 19 +; CHECK-NEXT: add 26, 4, 26 +; CHECK-NEXT: add 25, 29, 25 +; CHECK-NEXT: add 24, 29, 24 +; CHECK-NEXT: add 23, 3, 23 +; CHECK-NEXT: add 22, 3, 22 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_5: # Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: lfd 0, 0(23) -; CHECK-NEXT: lfd 1, 0(20) +; CHECK-NEXT: lfd 0, 0(26) +; CHECK-NEXT: lfd 1, 0(23) ; CHECK-NEXT: add 6, 6, 10 -; CHECK-NEXT: cmpd 6, 24 +; CHECK-NEXT: cmpd 6, 27 ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 8(20) +; CHECK-NEXT: lfd 1, 8(23) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 16(20) +; CHECK-NEXT: lfd 1, 16(23) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 24(20) -; CHECK-NEXT: add 20, 20, 11 +; CHECK-NEXT: lfd 1, 24(23) +; CHECK-NEXT: add 23, 23, 11 ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 0(19) +; CHECK-NEXT: lfd 1, 0(22) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 8(19) +; CHECK-NEXT: lfd 1, 8(22) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 16(19) +; CHECK-NEXT: lfd 1, 16(22) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 24(19) -; CHECK-NEXT: add 19, 19, 11 +; CHECK-NEXT: lfd 1, 24(22) +; CHECK-NEXT: add 22, 22, 11 ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, -16(21) +; CHECK-NEXT: lfd 1, -16(24) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, -8(21) +; CHECK-NEXT: lfd 1, -8(24) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 0(21) +; CHECK-NEXT: lfd 1, 0(24) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 8(21) -; CHECK-NEXT: add 21, 21, 11 +; CHECK-NEXT: lfd 1, 8(24) +; CHECK-NEXT: add 24, 24, 11 ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, -16(22) +; CHECK-NEXT: lfd 1, -16(25) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, -8(22) +; CHECK-NEXT: lfd 1, -8(25) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 0(22) +; CHECK-NEXT: lfd 1, 0(25) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 8(22) -; CHECK-NEXT: add 22, 22, 11 +; CHECK-NEXT: lfd 1, 8(25) +; CHECK-NEXT: add 25, 25, 11 ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: stfd 0, 0(23) -; CHECK-NEXT: add 23, 23, 11 +; CHECK-NEXT: stfd 0, 0(26) +; CHECK-NEXT: add 26, 26, 11 ; CHECK-NEXT: blt 0, .LBB0_5 ; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_6: @@ -122,9 +113,6 @@ ; CHECK-NEXT: ld 24, -64(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 23, -72(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 22, -80(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 21, -88(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 20, -96(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 19, -104(1) # 8-byte Folded Reload ; CHECK-NEXT: blr %9 = icmp slt i64 %2, %4 br i1 %9, label %10, label %97 Index: llvm/test/Transforms/LoopIdiom/basic.ll =================================================================== --- llvm/test/Transforms/LoopIdiom/basic.ll +++ llvm/test/Transforms/LoopIdiom/basic.ll @@ -809,28 +809,26 @@ ; CHECK-NEXT: [[TOBOOL_9:%.*]] = icmp eq i32 [[C]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_9]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK: while.body.preheader: -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[C]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = shl nsw i64 [[TMP1]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], -4 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw i32 [[C]], -1 -; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP3]], [[TMP6]] -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[CALL]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP1]], -1 -; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[TMP5]] -; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[C]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = shl nsw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[CALL]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP7]] ; CHECK-NEXT: [[SCEVGEP12:%.*]] = bitcast i32* [[SCEVGEP1]] to i8* -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[C]] to i64 -; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[SCEVGEP]], i8* align 4 [[SCEVGEP12]], i64 [[TMP11]], i1 false) +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[C]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[SCEVGEP]], i8* align 4 [[SCEVGEP12]], i64 [[TMP9]], i1 false) ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[DEC10_IN:%.*]] = phi i32 [ [[DEC10:%.*]], [[WHILE_BODY]] ], [ [[C]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: [[DEC10]] = add nsw i32 [[DEC10_IN]], -1 ; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[DEC10]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 [[IDXPROM]] ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[DEC10]], 0 ; CHECK-NEXT: br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]