diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -2781,7 +2781,8 @@ // If we found some loop invariants, fold them into the recurrence. if (!LIOps.empty()) { // Compute nowrap flags for the addition of the loop-invariant ops and - // the addrec. Temporarily push it as an operand for that purpose. + // the addrec. Temporarily push it as an operand for that purpose. These + // flags are valid in the scope of the addrec only. LIOps.push_back(AddRec); SCEV::NoWrapFlags Flags = ComputeFlags(LIOps); LIOps.pop_back(); @@ -2790,10 +2791,26 @@ LIOps.push_back(AddRec->getStart()); SmallVector AddRecOps(AddRec->operands()); - // This follows from the fact that the no-wrap flags on the outer add - // expression are applicable on the 0th iteration, when the add recurrence - // will be equal to its start value. - AddRecOps[0] = getAddExpr(LIOps, Flags, Depth + 1); + + // It is not in general safe to propagate flags valid on an add within + // the addrec scope to one outside it. We must prove that the inner + // scope is guaranteed to execute if the outer one does to be able to + // safely propagate. We know the program is undefined if poison is + // produced on the inner scoped addrec. We also know that *for this use* + // the outer scoped add can't overflow (because of the flags we just + // computed for the inner scoped add) without the program being undefined. + // Proving that entry to the outer scope neccesitates entry to the inner + // scope, thus proves the program undefined if the flags would be violated + // in the outer scope. + const bool CanPropagateFlags = llvm::any_of(LIOps, [&](const SCEV *S) { + auto *ReachI = &*AddRecLoop->getHeader()->begin(); + if (auto *DefI = getDefinedScopeRoot(S)) + if (isGuaranteedToTransferExecutionTo(DefI, ReachI)) + return true; + return false; + }); + auto AddFlags = CanPropagateFlags ? Flags : SCEV::FlagAnyWrap; + AddRecOps[0] = getAddExpr(LIOps, AddFlags, Depth + 1); // Build the new addrec. Propagate the NUW and NSW flags if both the // outer add and the inner addrec are guaranteed to have no overflow. @@ -6572,7 +6589,13 @@ const Instruction *ScalarEvolution::getDefinedScopeRoot(const SCEV *S) { if (auto *AddRec = dyn_cast(S)) return &*AddRec->getLoop()->getHeader()->begin(); - // TODO: add SCEVConstant and SCEVUnknown caxes here + if (isa(S)) + return &*F.getEntryBlock().begin(); + if (auto *U = dyn_cast(S)) { + if (auto *I = dyn_cast(U->getValue())) + return I; + return &*F.getEntryBlock().begin(); + } return nullptr; } @@ -6590,6 +6613,15 @@ ::isGuaranteedToTransferExecutionToSuccessor(A->getIterator(), B->getIterator())) return true; + + auto *BLoop = LI.getLoopFor(B->getParent()); + if (BLoop && BLoop->getHeader() == B->getParent() && + BLoop->getLoopPreheader() == A->getParent() && + ::isGuaranteedToTransferExecutionToSuccessor(A->getIterator(), + A->getParent()->end()) && + ::isGuaranteedToTransferExecutionToSuccessor(B->getParent()->begin(), + B->getIterator())) + return true; return false; } @@ -6625,8 +6657,14 @@ // TODO: We can do better here in some cases. if (!isSCEVable(Op->getType())) return false; - if (auto *DefI = getDefinedScopeRoot(getSCEV(Op))) - if (isGuaranteedToTransferExecutionTo(DefI, I)) + // TODO: the following two lines should be: + // if (auto *DefI = getDefinedScopeRoot(getSCEV(Op))) + // if (isGuaranteedToTransferExecutionTo(DefI, I)) + // We use the following instead for the purposes of seperating a bugfix + // change from an optimization change. Once pr51817 is fully addressed, + // we should unlock this power. + if (auto *AddRecS = dyn_cast(getSCEV(Op))) + if (isGuaranteedToExecuteForEveryIteration(I, AddRecS->getLoop())) return true; } return false; diff --git a/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll b/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll --- a/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll +++ b/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll @@ -1328,7 +1328,7 @@ ; CHECK-NEXT: %index32 = sub nsw i32 %i, %sub ; CHECK-NEXT: --> {((-1 * %sub) + %start),+,1}<%loop> U: full-set S: full-set Exits: (-1 + (-1 * %sub) + %numIterations) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index64 = sext i32 %index32 to i64 -; CHECK-NEXT: --> {((sext i32 %start to i64) + (-1 * (sext i32 %sub to i64))),+,1}<%loop> U: [-4294967295,8589934591) S: [-4294967295,8589934591) Exits: ((zext i32 (-1 + (-1 * %start) + %numIterations) to i64) + (sext i32 %start to i64) + (-1 * (sext i32 %sub to i64))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {((sext i32 %start to i64) + (-1 * (sext i32 %sub to i64))),+,1}<%loop> U: [-4294967295,8589934591) S: [-4294967295,8589934591) Exits: ((zext i32 (-1 + (-1 * %start) + %numIterations) to i64) + (sext i32 %start to i64) + (-1 * (sext i32 %sub to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %ptr = getelementptr inbounds float, float* %input, i64 %index64 ; CHECK-NEXT: --> {((4 * (sext i32 %start to i64)) + (-4 * (sext i32 %sub to i64)) + %input),+,4}<%loop> U: full-set S: full-set Exits: ((4 * (zext i32 (-1 + (-1 * %start) + %numIterations) to i64)) + (4 * (sext i32 %start to i64)) + (-4 * (sext i32 %sub to i64)) + %input) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %nexti = add nsw i32 %i, 1 @@ -1369,7 +1369,7 @@ ; CHECK-NEXT: %index32 = sub nsw i32 %i, %halfsub ; CHECK-NEXT: --> {((-1 * %halfsub) + %start),+,1}<%loop> U: full-set S: full-set Exits: (-1 + (-1 * %halfsub) + %numIterations) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index64 = sext i32 %index32 to i64 -; CHECK-NEXT: --> {((sext i32 %start to i64) + (-1 * (sext i32 %halfsub to i64))),+,1}<%loop> U: [-3221225471,7516192767) S: [-3221225471,7516192767) Exits: ((zext i32 (-1 + (-1 * %start) + %numIterations) to i64) + (sext i32 %start to i64) + (-1 * (sext i32 %halfsub to i64))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {((sext i32 %start to i64) + (-1 * (sext i32 %halfsub to i64))),+,1}<%loop> U: [-3221225471,7516192767) S: [-3221225471,7516192767) Exits: ((zext i32 (-1 + (-1 * %start) + %numIterations) to i64) + (sext i32 %start to i64) + (-1 * (sext i32 %halfsub to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %ptr = getelementptr inbounds float, float* %input, i64 %index64 ; CHECK-NEXT: --> {((4 * (sext i32 %start to i64)) + (-4 * (sext i32 %halfsub to i64)) + %input),+,4}<%loop> U: full-set S: full-set Exits: ((4 * (zext i32 (-1 + (-1 * %start) + %numIterations) to i64)) + (4 * (sext i32 %start to i64)) + (-4 * (sext i32 %halfsub to i64)) + %input) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %nexti = add nsw i32 %i, 1 diff --git a/llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll b/llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll --- a/llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll +++ b/llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll @@ -21,7 +21,7 @@ ; CHECK-NEXT: %idxprom20 = zext i32 %storemerge1921 to i64 ; CHECK-NEXT: --> {3,+,4294967295}<%for.cond6> U: [3,4) S: [3,4) Exits: <> LoopDispositions: { %for.cond6: Computable, %outer.loop: Variant } ; CHECK-NEXT: %arrayidx7 = getelementptr inbounds [1 x [4 x i16]], [1 x [4 x i16]]* @__const.f.g, i64 0, i64 0, i64 %idxprom20 -; CHECK-NEXT: --> {(6 + @__const.f.g),+,8589934590}<%for.cond6> U: [6,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %for.cond6: Computable, %outer.loop: Variant } +; CHECK-NEXT: --> {(6 + @__const.f.g),+,8589934590}<%for.cond6> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %for.cond6: Computable, %outer.loop: Variant } ; CHECK-NEXT: %i = load i16, i16* %arrayidx7, align 2 ; CHECK-NEXT: --> %i U: full-set S: full-set Exits: <> LoopDispositions: { %for.cond6: Variant, %outer.loop: Variant } ; CHECK-NEXT: %storemerge1822.lcssa.ph = phi i32 [ 0, %for.cond6 ] @@ -45,7 +45,7 @@ ; CHECK-NEXT: %idxprom20.3 = zext i32 %storemerge1921.3 to i64 ; CHECK-NEXT: --> {3,+,4294967295}<%inner.loop> U: [3,4) S: [3,4) Exits: <> LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant } ; CHECK-NEXT: %arrayidx7.3 = getelementptr inbounds [1 x [4 x i16]], [1 x [4 x i16]]* @__const.f.g, i64 0, i64 0, i64 %idxprom20.3 -; CHECK-NEXT: --> {(6 + @__const.f.g),+,8589934590}<%inner.loop> U: [6,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant } +; CHECK-NEXT: --> {(6 + @__const.f.g),+,8589934590}<%inner.loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant } ; CHECK-NEXT: %i7 = load i16, i16* %arrayidx7.3, align 2 ; CHECK-NEXT: --> %i7 U: full-set S: full-set Exits: <> LoopDispositions: { %inner.loop: Variant, %outer.loop: Variant } ; CHECK-NEXT: %i8 = load volatile i32, i32* @b, align 4 diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll --- a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll +++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll @@ -297,11 +297,11 @@ ; CHECK-NEXT: %iv = phi i32 [ %a, %entry ], [ %iv.next, %loop ] ; CHECK-NEXT: --> {%a,+,%b}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw nsw i32 %iv, %b -; CHECK-NEXT: --> {(%a + %b),+,%b}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(%a + %b),+,%b}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %trap = udiv i32 %a, %iv.next -; CHECK-NEXT: --> (%a /u {(%a + %b),+,%b}<%loop>) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> (%a /u {(%a + %b),+,%b}<%loop>) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %c = add i32 %a, %b -; CHECK-NEXT: --> (%a + %b) U: full-set S: full-set +; CHECK-NEXT: --> (%a + %b) U: full-set S: full-set ; CHECK-NEXT: Determining loop execution counts for: @test2_a ; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %loop: Unpredictable max backedge-taken count. @@ -335,9 +335,9 @@ ; CHECK-NEXT: %iv = phi i32 [ %a, %entry ], [ %iv.next, %loop ] ; CHECK-NEXT: --> {%a,+,%b}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw nsw i32 %iv, %b -; CHECK-NEXT: --> {(%a + %b),+,%b}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(%a + %b),+,%b}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %trap = udiv i32 %a, %iv.next -; CHECK-NEXT: --> (%a /u {(%a + %b),+,%b}<%loop>) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> (%a /u {(%a + %b),+,%b}<%loop>) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test2_b ; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %loop: Unpredictable max backedge-taken count. diff --git a/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll b/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll --- a/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll +++ b/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll @@ -30,13 +30,13 @@ ; CHECK-NEXT: %8 = sext i32 %7 to i64 ; CHECK-NEXT: --> {1,+,2}<%bb> U: [1,2147483646) S: [1,2147483646) Exits: (1 + (2 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2))) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %9 = getelementptr inbounds double, double* %q, i64 %8 -; CHECK-NEXT: --> {(8 + %q),+,16}<%bb> U: [8,0) S: [8,0) Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2)) + %q) LoopDispositions: { %bb: Computable } +; CHECK-NEXT: --> {(8 + %q),+,16}<%bb> U: full-set S: full-set Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2)) + %q) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %t7 = add nsw i32 %i.01, 1 ; CHECK-NEXT: --> {1,+,2}<%bb> U: [1,2147483646) S: [1,2147483646) Exits: (1 + (2 * ((-1 + (2 * (%no /u 2))) /u 2))) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %t8 = sext i32 %t7 to i64 ; CHECK-NEXT: --> {1,+,2}<%bb> U: [1,2147483646) S: [1,2147483646) Exits: (1 + (2 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2))) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %t9 = getelementptr inbounds double, double* %q, i64 %t8 -; CHECK-NEXT: --> {(8 + %q),+,16}<%bb> U: [8,0) S: [8,0) Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2)) + %q) LoopDispositions: { %bb: Computable } +; CHECK-NEXT: --> {(8 + %q),+,16}<%bb> U: full-set S: full-set Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2)) + %q) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %14 = sext i32 %i.01 to i64 ; CHECK-NEXT: --> {0,+,2}<%bb> U: [0,2147483645) S: [0,2147483645) Exits: (2 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2)) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %15 = getelementptr inbounds double, double* %d, i64 %14 diff --git a/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll b/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll --- a/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll +++ b/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll @@ -27,13 +27,13 @@ ; CHECK-NEXT: %8 = sext i32 %7 to i64 ; CHECK-NEXT: --> {1,+,2}<%bb> U: [1,2147483646) S: [1,2147483646) Exits: (1 + (2 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2))) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %9 = getelementptr inbounds double, double* %q, i64 %8 -; CHECK-NEXT: --> {(8 + %q),+,16}<%bb> U: [8,0) S: [8,0) Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2)) + %q) LoopDispositions: { %bb: Computable } +; CHECK-NEXT: --> {(8 + %q),+,16}<%bb> U: full-set S: full-set Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2)) + %q) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %t7 = add nsw i32 %i.01, 1 ; CHECK-NEXT: --> {1,+,2}<%bb> U: [1,2147483646) S: [1,2147483646) Exits: (1 + (2 * ((-1 + (2 * (%no /u 2))) /u 2))) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %t8 = sext i32 %t7 to i64 ; CHECK-NEXT: --> {1,+,2}<%bb> U: [1,2147483646) S: [1,2147483646) Exits: (1 + (2 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2))) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %t9 = getelementptr inbounds double, double* %q, i64 %t8 -; CHECK-NEXT: --> {(8 + %q),+,16}<%bb> U: [8,0) S: [8,0) Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2)) + %q) LoopDispositions: { %bb: Computable } +; CHECK-NEXT: --> {(8 + %q),+,16}<%bb> U: full-set S: full-set Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2)) + %q) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %14 = sext i32 %i.01 to i64 ; CHECK-NEXT: --> {0,+,2}<%bb> U: [0,2147483645) S: [0,2147483645) Exits: (2 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))) to i64)) /u 2)) LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %15 = getelementptr inbounds double, double* %d, i64 %14 diff --git a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll --- a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll +++ b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll @@ -6,9 +6,6 @@ ; CHECK-LABEL: foo: ; CHECK: # %bb.0: ; CHECK-NEXT: cmpd 5, 7 -; CHECK-NEXT: std 19, -104(1) # 8-byte Folded Spill -; CHECK-NEXT: std 20, -96(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, -88(1) # 8-byte Folded Spill ; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill ; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill ; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill @@ -26,90 +23,84 @@ ; CHECK-NEXT: mulld 12, 8, 5 ; CHECK-NEXT: addi 29, 3, 16 ; CHECK-NEXT: mulld 0, 9, 8 -; CHECK-NEXT: mr 25, 12 +; CHECK-NEXT: sldi 11, 10, 3 ; CHECK-NEXT: mulld 30, 8, 30 ; CHECK-NEXT: mulld 28, 8, 28 ; CHECK-NEXT: mulld 8, 8, 27 -; CHECK-NEXT: sldi 11, 10, 3 -; CHECK-NEXT: li 27, 0 -; CHECK-NEXT: mr 26, 30 ; CHECK-NEXT: b .LBB0_3 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: add 5, 5, 9 -; CHECK-NEXT: add 25, 25, 0 -; CHECK-NEXT: add 26, 26, 0 +; CHECK-NEXT: add 12, 12, 0 +; CHECK-NEXT: add 30, 30, 0 ; CHECK-NEXT: add 28, 28, 0 ; CHECK-NEXT: add 8, 8, 0 -; CHECK-NEXT: addi 27, 27, 1 ; CHECK-NEXT: cmpd 5, 7 ; CHECK-NEXT: bge 0, .LBB0_6 ; CHECK-NEXT: .LBB0_3: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_5 Depth 2 -; CHECK-NEXT: sub 24, 5, 10 -; CHECK-NEXT: cmpd 6, 24 +; CHECK-NEXT: sub 27, 5, 10 +; CHECK-NEXT: cmpd 6, 27 ; CHECK-NEXT: bge 0, .LBB0_2 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: maddld 21, 0, 27, 30 -; CHECK-NEXT: maddld 20, 0, 27, 12 -; CHECK-NEXT: add 23, 6, 28 -; CHECK-NEXT: add 22, 6, 8 -; CHECK-NEXT: add 20, 6, 20 -; CHECK-NEXT: add 19, 6, 21 +; CHECK-NEXT: add 23, 6, 12 +; CHECK-NEXT: add 22, 6, 30 +; CHECK-NEXT: add 26, 6, 28 +; CHECK-NEXT: add 25, 6, 8 +; CHECK-NEXT: sldi 24, 6, 3 +; CHECK-NEXT: sldi 26, 26, 3 +; CHECK-NEXT: sldi 25, 25, 3 ; CHECK-NEXT: sldi 23, 23, 3 ; CHECK-NEXT: sldi 22, 22, 3 -; CHECK-NEXT: sldi 21, 6, 3 -; CHECK-NEXT: add 23, 29, 23 -; CHECK-NEXT: add 22, 29, 22 -; CHECK-NEXT: sldi 20, 20, 3 -; CHECK-NEXT: sldi 19, 19, 3 -; CHECK-NEXT: add 21, 4, 21 -; CHECK-NEXT: add 20, 3, 20 -; CHECK-NEXT: add 19, 3, 19 +; CHECK-NEXT: add 24, 4, 24 +; CHECK-NEXT: add 26, 29, 26 +; CHECK-NEXT: add 25, 29, 25 +; CHECK-NEXT: add 23, 3, 23 +; CHECK-NEXT: add 22, 3, 22 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_5: # Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: lfd 0, 0(21) -; CHECK-NEXT: lfd 1, 0(20) +; CHECK-NEXT: lfd 0, 0(24) +; CHECK-NEXT: lfd 1, 0(23) ; CHECK-NEXT: add 6, 6, 10 -; CHECK-NEXT: cmpd 6, 24 +; CHECK-NEXT: cmpd 6, 27 ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 8(20) +; CHECK-NEXT: lfd 1, 8(23) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 16(20) +; CHECK-NEXT: lfd 1, 16(23) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 24(20) -; CHECK-NEXT: add 20, 20, 11 +; CHECK-NEXT: lfd 1, 24(23) +; CHECK-NEXT: add 23, 23, 11 ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 0(19) +; CHECK-NEXT: lfd 1, 0(22) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 8(19) +; CHECK-NEXT: lfd 1, 8(22) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 16(19) +; CHECK-NEXT: lfd 1, 16(22) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 24(19) -; CHECK-NEXT: add 19, 19, 11 +; CHECK-NEXT: lfd 1, 24(22) +; CHECK-NEXT: add 22, 22, 11 ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, -16(22) +; CHECK-NEXT: lfd 1, -16(25) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, -8(22) +; CHECK-NEXT: lfd 1, -8(25) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 0(22) +; CHECK-NEXT: lfd 1, 0(25) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 8(22) -; CHECK-NEXT: add 22, 22, 11 +; CHECK-NEXT: lfd 1, 8(25) +; CHECK-NEXT: add 25, 25, 11 ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, -16(23) +; CHECK-NEXT: lfd 1, -16(26) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, -8(23) +; CHECK-NEXT: lfd 1, -8(26) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 0(23) +; CHECK-NEXT: lfd 1, 0(26) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 8(23) -; CHECK-NEXT: add 23, 23, 11 +; CHECK-NEXT: lfd 1, 8(26) +; CHECK-NEXT: add 26, 26, 11 ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: stfd 0, 0(21) -; CHECK-NEXT: add 21, 21, 11 +; CHECK-NEXT: stfd 0, 0(24) +; CHECK-NEXT: add 24, 24, 11 ; CHECK-NEXT: blt 0, .LBB0_5 ; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_6: @@ -122,9 +113,6 @@ ; CHECK-NEXT: ld 24, -64(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 23, -72(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 22, -80(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 21, -88(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 20, -96(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 19, -104(1) # 8-byte Folded Reload ; CHECK-NEXT: blr %9 = icmp slt i64 %2, %4 br i1 %9, label %10, label %97 diff --git a/llvm/test/Transforms/LoopIdiom/basic.ll b/llvm/test/Transforms/LoopIdiom/basic.ll --- a/llvm/test/Transforms/LoopIdiom/basic.ll +++ b/llvm/test/Transforms/LoopIdiom/basic.ll @@ -809,28 +809,26 @@ ; CHECK-NEXT: [[TOBOOL_9:%.*]] = icmp eq i32 [[C]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_9]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK: while.body.preheader: -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[C]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = shl nsw i64 [[TMP1]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], -4 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw i32 [[C]], -1 -; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP3]], [[TMP6]] -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[CALL]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP1]], -1 -; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[TMP5]] -; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[C]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = shl nsw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[CALL]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP7]] ; CHECK-NEXT: [[SCEVGEP12:%.*]] = bitcast i32* [[SCEVGEP1]] to i8* -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[C]] to i64 -; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[SCEVGEP]], i8* align 4 [[SCEVGEP12]], i64 [[TMP11]], i1 false) +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[C]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[SCEVGEP]], i8* align 4 [[SCEVGEP12]], i64 [[TMP9]], i1 false) ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[DEC10_IN:%.*]] = phi i32 [ [[DEC10:%.*]], [[WHILE_BODY]] ], [ [[C]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: [[DEC10]] = add nsw i32 [[DEC10_IN]], -1 ; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[DEC10]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 [[IDXPROM]] ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[DEC10]], 0 ; CHECK-NEXT: br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]