Index: lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- lib/Transforms/Utils/LoopUnroll.cpp +++ lib/Transforms/Utils/LoopUnroll.cpp @@ -218,10 +218,16 @@ bool CompletelyUnroll = Count == TripCount; SmallVector ExitBlocks; L->getExitBlocks(ExitBlocks); - Loop *ParentL = L->getParentLoop(); - bool AllExitsAreInsideParentLoop = !ParentL || - std::all_of(ExitBlocks.begin(), ExitBlocks.end(), - [&](BasicBlock *BB) { return ParentL->contains(BB); }); + + // Go through all exits of L and see if there are any phi-nodes there. We just + // conservatively assume that they're inserted to preserve LCSSA form, which + // means that complete unrolling might break this form. We need to either fix + // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For + // now we just recompute LCSSA for the outer loop, but it should be possible + // to fix it in-place. + bool NeedToFixLCSSA = CompletelyUnroll && + std::any_of(ExitBlocks.begin(), ExitBlocks.end(), + [&](BasicBlock *BB) { return isa(BB->begin()); }); // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime @@ -308,6 +314,7 @@ LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); + std::vector UnrolledLoopBlocks = L->getBlocks(); for (unsigned It = 1; It != Count; ++It) { std::vector NewBlocks; SmallDenseMap NewLoops; @@ -387,6 +394,7 @@ Latches.push_back(New); NewBlocks.push_back(New); + UnrolledLoopBlocks.push_back(New); } // Remap all instructions in the most recent iteration @@ -476,8 +484,12 @@ if (Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, SE, - ForgottenLoops)) + ForgottenLoops)) { std::replace(Latches.begin(), Latches.end(), Dest, Fold); + UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(), + UnrolledLoopBlocks.end(), Dest), + UnrolledLoopBlocks.end()); + } } } @@ -530,6 +542,29 @@ if (CompletelyUnroll) LI->markAsRemoved(L); + // After complete unrolling most of the blocks should be contained in OuterL. + // However, some of them might happen to be out of OuterL (e.g. if they + // precede a loop exit). In this case we might need to insert PHI nodes in + // order to preserve LCSSA form. + // We don't need to check this if we already know that we need to fix LCSSA + // form. + // TODO: For now we just recompute LCSSA for the outer loop in this case, but + // it should be possible to fix it in-place. + if (OuterL && CompletelyUnroll && !NeedToFixLCSSA) { + for (unsigned i = 0; i < UnrolledLoopBlocks.size(); ++i) { + BasicBlock *BB = UnrolledLoopBlocks[i]; + if (LI->getLoopFor(BB) == OuterL) + continue; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + for (unsigned OpI = 0, OpE = I->getNumOperands(); OpI < OpE; ++OpI) { + if (auto Def = dyn_cast(I->getOperand(OpI))) + if (LI->getLoopFor(Def->getParent()) == OuterL) + NeedToFixLCSSA = true; + } + } + } + } + // If we have a pass and a DominatorTree we should re-simplify impacted loops // to ensure subsequent analyses can rely on this form. We want to simplify // at least one layer outside of the loop that was unrolled so that any @@ -538,7 +573,7 @@ if (!OuterL && !CompletelyUnroll) OuterL = L; if (OuterL) { - bool Simplified = simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA); + simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA); // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after @@ -548,7 +583,7 @@ while (OuterL->getParentLoop() != LatchLoop) OuterL = OuterL->getParentLoop(); - if (CompletelyUnroll && (!AllExitsAreInsideParentLoop || Simplified)) + if (CompletelyUnroll && NeedToFixLCSSA) formLCSSARecursively(*OuterL, *DT, LI, SE); else assert(OuterL->isLCSSAForm(*DT) &&