Index: llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -291,15 +291,10 @@ Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); - Loop *NewLoop = nullptr; Loop *ParentLoop = L->getParentLoop(); - if (CreateRemainderLoop) { - NewLoop = new Loop(); - if (ParentLoop) - ParentLoop->addChildLoop(NewLoop); - else - LI->addTopLevelLoop(NewLoop); - } + + // The map from original loops to their cloned copies. + SmallDenseMap NewLoops; // For each block in the original loop, create a new copy, // and update the value map with the newly created values. @@ -307,10 +302,57 @@ BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); + // Figure out which loop NewBB is in. + auto findClonedLoop = [&](const Loop *OldLoop) { + Loop *&NewLoop = NewLoops[OldLoop]; + // If we've encountered this loop before, return it right away. + if (NewLoop) + return NewLoop; + + // If BB is from L, and we're not creating a remainder, the loop for + // NewBB will be ParentLoop, which might be null. Update NewLoops map and + // return ParentLoop. + if (OldLoop == L && !CreateRemainderLoop) + return (NewLoop = ParentLoop); + + // Now we know that there should be a cloned counterpart for OldLoop, but + // we haven't seen it yet. Note that OldLoop might be L if we're + // generating a remainder loop, or it can be an inner loop of L - in this + // case we'll recreate the loop structure of L in its clone. + + // This is a first block belonging to OldLoop encountered in our RPO + // traversal. + assert(*BB == OldLoop->getHeader() && "Header should be first in RPO"); + + NewLoop = new Loop; + Loop *OldLoopParent = OldLoop->getParentLoop(); + // If OldLoop has a parent loop, we have two options: + // 1. ParentLoop is the parent of L. It won't be cloned, and it will + // be a parent for NewLoop too. + // 2. ParentLoop is not a parent of L. In this case, it should be one + // of the cloned loops and we should be able to find it in our map. + // + // If OldLoop doesn't have a parent, then NewLoop should be yet another + // top-level loop. + if (OldLoopParent) { + Loop *NewLoopParent = ParentLoop == OldLoopParent + ? ParentLoop + : NewLoops.lookup(OldLoopParent); + assert(NewLoopParent && "Expected parent loop before sub-loop in RPO"); + NewLoopParent->addChildLoop(NewLoop); + } else + LI->addTopLevelLoop(NewLoop); + return NewLoop; + }; + + Loop *NewLoop = findClonedLoop(LI->getLoopFor(*BB)); + + assert(NewLoop || + (!CreateRemainderLoop && !ParentLoop) && + "NewLoop can only be null if we are cloning top-level loop " + "without creating a remainder loop."); if (NewLoop) NewLoop->addBasicBlockToLoop(NewBB, *LI); - else if (ParentLoop) - ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; if (Header == *BB) { @@ -369,7 +411,8 @@ NewPHI->setIncomingValue(idx, V); } } - if (NewLoop) { + if (CreateRemainderLoop) { + Loop *NewLoop = NewLoops[L]; // Add unroll disable metadata to disable future unrolling for this loop. SmallVector MDs; // Reserve first location for self reference to the LoopID metadata node. Index: llvm/trunk/test/Transforms/LoopUnroll/pr28888.ll =================================================================== --- llvm/trunk/test/Transforms/LoopUnroll/pr28888.ll +++ llvm/trunk/test/Transforms/LoopUnroll/pr28888.ll @@ -0,0 +1,44 @@ +; RUN: opt -loop-unroll -verify-loop-info -unroll-runtime-epilog=false -unroll-count=4 -S < %s | FileCheck %s -check-prefix=PROLOG +; RUN: opt -loop-unroll -verify-loop-info -unroll-runtime-epilog=true -unroll-count=4 -S < %s | FileCheck %s -check-prefix=EPILOG + +; PR28888 +; Check that loop info is correct if we unroll an outer loop, and thus the +; remainder loop has a child loop. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; PROLOG-LABEL: @foo +; EPILOG-LABEL: @foo +define void @foo(i1 %x) #0 { +bb: + br label %bb1 + +bb1: + br label %bb2 + +; PROLOG: bb2.prol: +; EPILOG: bb2.epil: +bb2: + %tmp = phi i64 [ 0, %bb1 ], [ %tmp2, %bb5 ] + br label %bb3 + +bb3: + br label %bb4 + +bb4: + br i1 %x, label %bb3, label %bb5 + +; PROLOG: bb5.3: +; EPILOG: bb5.3: +bb5: + %tmp2 = add nuw nsw i64 %tmp, 1 + %tmp3 = trunc i64 %tmp2 to i32 + %tmp4 = icmp eq i32 %tmp3, undef + br i1 %tmp4, label %bb6, label %bb2 + +bb6: + br label %bb1 +} + +attributes #0 = { "target-cpu"="x86-64" }