Index: lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- lib/Transforms/Utils/LoopUnroll.cpp +++ lib/Transforms/Utils/LoopUnroll.cpp @@ -97,15 +97,9 @@ /// Folds a basic block into its predecessor if it only has one predecessor, and /// that predecessor only has one successor. -/// The LoopInfo Analysis that is passed will be kept consistent. If folding is -/// successful references to the containing loop must be removed from -/// ScalarEvolution by calling ScalarEvolution::forgetLoop because SE may have -/// references to the eliminated BB. The argument ForgottenLoops contains a set -/// of loops that have already been forgotten to prevent redundant, expensive -/// calls to ScalarEvolution::forgetLoop. Returns the new combined block. +/// The LoopInfo Analysis that is passed will be kept consistent. static BasicBlock * foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, ScalarEvolution *SE, - SmallPtrSetImpl &ForgottenLoops, DominatorTree *DT) { // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and @@ -149,13 +143,6 @@ DT->eraseNode(BB); } - // ScalarEvolution holds references to loop exit blocks. - if (SE) { - if (Loop *L = LI->getLoopFor(BB)) { - if (ForgottenLoops.insert(L).second) - SE->forgetLoop(L); - } - } LI->removeBlock(BB); // Inherit predecessor's name if it exists... @@ -449,11 +436,6 @@ } } - // Notify ScalarEvolution that the loop will be substantially changed, - // if not outright eliminated. - if (SE) - SE->forgetLoop(L); - // If we know the trip count, we know the multiple... unsigned BreakoutTrip = 0; if (TripCount != 0) { @@ -520,6 +502,21 @@ DEBUG(dbgs() << "!\n"); } + // We are going to make changes to this loop. SCEV may be keeping cached info + // about it, in patricular about backedge taken count. The changes we make + // are guaranteed to invalidate this information for our loop. It is tempting + // to only invalidate the loop being unrolled, but it is incorrect as long as + // all exiting branches from all inner loops have impact on the outer loops, + // and if something changes inside them then any of outer loops may also + // change. When we forget outermost loop, we also forget all contained loops + // and this is what we need here. + if (SE) { + const Loop *CurrL = L; + while (const Loop *ParentL = CurrL->getParentLoop()) + CurrL = ParentL; + SE->forgetLoop(CurrL); + } + bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); @@ -577,14 +574,9 @@ "Header should not be in a sub-loop"); // Tell LI about New. const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops); - if (OldLoop) { + if (OldLoop) LoopsToSimplify.insert(NewLoops[OldLoop]); - // Forget the old loop, since its inputs may have changed. - if (SE) - SE->forgetLoop(OldLoop); - } - if (*BB == Header) // Loop over all of the PHI nodes in the block, changing them to use // the incoming values from the previous block. @@ -773,13 +765,12 @@ DT->verify(DominatorTree::VerificationLevel::Fast)); // Merge adjacent basic blocks, if possible. - SmallPtrSet ForgottenLoops; for (BasicBlock *Latch : Latches) { BranchInst *Term = cast(Latch->getTerminator()); if (Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); if (BasicBlock *Fold = - foldBlockIntoPredecessor(Dest, LI, SE, ForgottenLoops, DT)) { + foldBlockIntoPredecessor(Dest, LI, SE, DT)) { // Dest has been folded into Fold. Update our worklists accordingly. std::replace(Latches.begin(), Latches.end(), Dest, Fold); UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(), Index: test/Transforms/LoopUnroll/invalidate_right_loop.ll =================================================================== --- /dev/null +++ test/Transforms/LoopUnroll/invalidate_right_loop.ll @@ -0,0 +1,51 @@ +; RUN: opt < %s -S -indvars -loop-unroll -verify-dom-info | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" +target triple = "x86_64-unknown-linux-gnu" + +; Make sure that this test doesn't crash because of dangling pointer in SCEV. +declare void @llvm.experimental.guard(i1, ...) + +define void @test(i32* %p, i8** %p2, i64* %dest) { + +; CHECK-LABEL: @test( + +entry: + br label %outer.loop + +outer.loop: ; preds = %outer.latch, %entry + %local_2_ = phi i32 [ 10, %entry ], [ %tmp2, %outer.latch ] + %tmp1 = icmp eq i32 %local_2_, 0 + br label %inner.loop + +outer.latch: ; preds = %inner.latch + %tmp2 = add i32 %local_2_, 1 + br label %outer.loop + +inner.loop: ; preds = %inner.latch, %outer.loop + %local_4_20 = phi i32 [ 7, %outer.loop ], [ %tmp15, %inner.latch ] + %tmp6 = icmp eq i32 %local_4_20, 0 + call void (i1, ...) @llvm.experimental.guard(i1 %tmp6) [ "deopt"() ] + br label %innermost.loop + +store.block: ; preds = %innermost.loop + store i64 %tmp20, i64* %dest, align 8 + br i1 %tmp1, label %exit, label %inner.latch + +inner.latch: ; preds = %store.block + %tmp15 = add i32 %local_4_20, 4 + %tmp16 = icmp sgt i32 %tmp15, 263 + br i1 %tmp16, label %outer.latch, label %inner.loop + +innermost.loop: ; preds = %innermost.loop, %inner.loop + %tmp17 = phi i64 [ 0, %inner.loop ], [ %tmp20, %innermost.loop ] + %local_6_51 = phi i32 [ 1, %inner.loop ], [ %tmp21, %innermost.loop ] + %ze = zext i32 %local_6_51 to i64 + %tmp20 = add i64 %tmp17, %ze + %tmp21 = add nuw nsw i32 %local_6_51, 1 + %tmp22 = icmp ugt i32 %local_6_51, 5 + br i1 %tmp22, label %store.block, label %innermost.loop + +exit: ; preds = %store.block + ret void +}