Skip to content

Commit 69e68f8

Browse files
committedApr 25, 2018
[PM/LoopUnswitch] Begin teaching SimpleLoopUnswitch to use the new
update API for dominators rather than doing manual, hacky updates. This is just the first step, but in some ways the most important as it moves the non-trivial unswitching to update the domtree rather than fully recalculating it each time. Subsequent patches should remove the custom update logic used by the trivial unswitch and replace it with uses of the update API. This also fixes a number of bugs I was seeing when testing non-trivial unswitch due to it querying the quasi-correct dominator tree. Now the tree is 100% correct and safe to query. That said, there are still more bugs I can see with non-trivial unswitch just running over the test suite, so more bugfix patches are needed as well. Thanks to both Sanjoy and Fedor for reviews and testing! Differential Revision: https://reviews.llvm.org/D45943 llvm-svn: 330787
1 parent 8832f88 commit 69e68f8

File tree

1 file changed

+78
-70
lines changed

1 file changed

+78
-70
lines changed
 

‎llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

+78-70
Original file line numberDiff line numberDiff line change
@@ -766,8 +766,9 @@ static BasicBlock *buildClonedLoopBlocks(
766766
ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
767767
BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
768768
const SmallPtrSetImpl<BasicBlock *> &SkippedLoopAndExitBlocks,
769-
ValueToValueMapTy &VMap, AssumptionCache &AC, DominatorTree &DT,
770-
LoopInfo &LI) {
769+
ValueToValueMapTy &VMap,
770+
SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates, AssumptionCache &AC,
771+
DominatorTree &DT, LoopInfo &LI) {
771772
SmallVector<BasicBlock *, 4> NewBlocks;
772773
NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
773774

@@ -782,10 +783,6 @@ static BasicBlock *buildClonedLoopBlocks(
782783
NewBlocks.push_back(NewBB);
783784
VMap[OldBB] = NewBB;
784785

785-
// Add the block to the domtree. We'll move it to the correct position
786-
// below.
787-
DT.addNewBlock(NewBB, SplitBB);
788-
789786
return NewBB;
790787
};
791788

@@ -824,17 +821,6 @@ static BasicBlock *buildClonedLoopBlocks(
824821
assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
825822
"Cloned exit block has the wrong successor!");
826823

827-
// Move the merge block's idom to be the split point as one exit is
828-
// dominated by one header, and the other by another, so we know the split
829-
// point dominates both. While the dominator tree isn't fully accurate, we
830-
// want sub-trees within the original loop to be correctly reflect
831-
// dominance within that original loop (at least) and that requires moving
832-
// the merge block out of that subtree.
833-
// FIXME: This is very brittle as we essentially have a partial contract on
834-
// the dominator tree. We really need to instead update it and keep it
835-
// valid or stop relying on it.
836-
DT.changeImmediateDominator(MergeBB, SplitBB);
837-
838824
// Remap any cloned instructions and create a merge phi node for them.
839825
for (auto ZippedInsts : llvm::zip_first(
840826
llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
@@ -896,6 +882,11 @@ static BasicBlock *buildClonedLoopBlocks(
896882
for (PHINode &PN : ClonedSuccBB->phis())
897883
PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
898884

885+
// Record the domtree updates for the new blocks.
886+
for (auto *ClonedBB : NewBlocks)
887+
for (auto *SuccBB : successors(ClonedBB))
888+
DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
889+
899890
return ClonedPH;
900891
}
901892

@@ -1217,32 +1208,18 @@ static Loop *buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
12171208
return ClonedL;
12181209
}
12191210

1220-
static void deleteDeadBlocksFromLoop(Loop &L, BasicBlock *DeadSubtreeRoot,
1221-
SmallVectorImpl<BasicBlock *> &ExitBlocks,
1222-
DominatorTree &DT, LoopInfo &LI) {
1223-
// Walk the dominator tree to build up the set of blocks we will delete here.
1224-
// The order is designed to allow us to always delete bottom-up and avoid any
1225-
// dangling uses.
1226-
SmallSetVector<BasicBlock *, 16> DeadBlocks;
1227-
DeadBlocks.insert(DeadSubtreeRoot);
1228-
for (int i = 0; i < (int)DeadBlocks.size(); ++i)
1229-
for (DomTreeNode *ChildN : *DT[DeadBlocks[i]]) {
1230-
// FIXME: This assert should pass and that means we don't change nearly
1231-
// as much below! Consider rewriting all of this to avoid deleting
1232-
// blocks. They are always cloned before being deleted, and so instead
1233-
// could just be moved.
1234-
// FIXME: This in turn means that we might actually be more able to
1235-
// update the domtree.
1236-
assert((L.contains(ChildN->getBlock()) ||
1237-
llvm::find(ExitBlocks, ChildN->getBlock()) != ExitBlocks.end()) &&
1238-
"Should never reach beyond the loop and exits when deleting!");
1239-
DeadBlocks.insert(ChildN->getBlock());
1240-
}
1211+
static void
1212+
deleteDeadBlocksFromLoop(Loop &L,
1213+
const SmallVectorImpl<BasicBlock *> &DeadBlocks,
1214+
SmallVectorImpl<BasicBlock *> &ExitBlocks,
1215+
DominatorTree &DT, LoopInfo &LI) {
1216+
SmallPtrSet<BasicBlock *, 16> DeadBlockSet(DeadBlocks.begin(),
1217+
DeadBlocks.end());
12411218

12421219
// Filter out the dead blocks from the exit blocks list so that it can be
12431220
// used in the caller.
12441221
llvm::erase_if(ExitBlocks,
1245-
[&](BasicBlock *BB) { return DeadBlocks.count(BB); });
1222+
[&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
12461223

12471224
// Remove these blocks from their successors.
12481225
for (auto *BB : DeadBlocks)
@@ -1254,38 +1231,39 @@ static void deleteDeadBlocksFromLoop(Loop &L, BasicBlock *DeadSubtreeRoot,
12541231
for (auto *BB : DeadBlocks)
12551232
ParentL->getBlocksSet().erase(BB);
12561233
llvm::erase_if(ParentL->getBlocksVector(),
1257-
[&](BasicBlock *BB) { return DeadBlocks.count(BB); });
1234+
[&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
12581235
}
12591236

12601237
// Now delete the dead child loops. This raw delete will clear them
12611238
// recursively.
12621239
llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1263-
if (!DeadBlocks.count(ChildL->getHeader()))
1240+
if (!DeadBlockSet.count(ChildL->getHeader()))
12641241
return false;
12651242

12661243
assert(llvm::all_of(ChildL->blocks(),
12671244
[&](BasicBlock *ChildBB) {
1268-
return DeadBlocks.count(ChildBB);
1245+
return DeadBlockSet.count(ChildBB);
12691246
}) &&
12701247
"If the child loop header is dead all blocks in the child loop must "
12711248
"be dead as well!");
12721249
LI.destroy(ChildL);
12731250
return true;
12741251
});
12751252

1276-
// Remove the mappings for the dead blocks.
1277-
for (auto *BB : DeadBlocks)
1253+
// Remove the loop mappings for the dead blocks and drop all the references
1254+
// from these blocks to others to handle cyclic references as we start
1255+
// deleting the blocks themselves.
1256+
for (auto *BB : DeadBlocks) {
1257+
// Check that the dominator tree has already been updated.
1258+
assert(!DT.getNode(BB) && "Should already have cleared domtree!");
12781259
LI.changeLoopFor(BB, nullptr);
1279-
1280-
// Drop all the references from these blocks to others to handle cyclic
1281-
// references as we start deleting the blocks themselves.
1282-
for (auto *BB : DeadBlocks)
12831260
BB->dropAllReferences();
1261+
}
12841262

1285-
for (auto *BB : llvm::reverse(DeadBlocks)) {
1286-
DT.eraseNode(BB);
1263+
// Actually delete the blocks now that they've been fully unhooked from the
1264+
// IR.
1265+
for (auto *BB : DeadBlocks)
12871266
BB->eraseFromParent();
1288-
}
12891267
}
12901268

12911269
/// Recompute the set of blocks in a loop after unswitching.
@@ -1737,17 +1715,13 @@ static bool unswitchInvariantBranch(
17371715
// Keep a mapping for the cloned values.
17381716
ValueToValueMapTy VMap;
17391717

1718+
// Keep track of the dominator tree updates needed.
1719+
SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
1720+
17401721
// Build the cloned blocks from the loop.
17411722
auto *ClonedPH = buildClonedLoopBlocks(
17421723
L, LoopPH, SplitBB, ExitBlocks, ParentBB, UnswitchedSuccBB,
1743-
ContinueSuccBB, SkippedLoopAndExitBlocks, VMap, AC, DT, LI);
1744-
1745-
// Build the cloned loop structure itself. This may be substantially
1746-
// different from the original structure due to the simplified CFG. This also
1747-
// handles inserting all the cloned blocks into the correct loops.
1748-
SmallVector<Loop *, 4> NonChildClonedLoops;
1749-
Loop *ClonedL =
1750-
buildClonedLoops(L, ExitBlocks, VMap, LI, NonChildClonedLoops);
1724+
ContinueSuccBB, SkippedLoopAndExitBlocks, VMap, DTUpdates, AC, DT, LI);
17511725

17521726
// Remove the parent as a predecessor of the unswitched successor.
17531727
UnswitchedSuccBB->removePredecessor(ParentBB, /*DontDeleteUselessPHIs*/ true);
@@ -1763,23 +1737,57 @@ static bool unswitchInvariantBranch(
17631737
// the one cloned).
17641738
BranchInst::Create(ContinueSuccBB, ParentBB);
17651739

1740+
// Before we update the dominator tree, collect the dead blocks if we're going
1741+
// to end up deleting the unswitched successor.
1742+
SmallVector<BasicBlock *, 16> DeadBlocks;
1743+
if (DeleteUnswitchedSucc) {
1744+
DeadBlocks.push_back(UnswitchedSuccBB);
1745+
for (int i = 0; i < (int)DeadBlocks.size(); ++i) {
1746+
// If we reach an exit block, stop recursing as the unswitched loop will
1747+
// end up reaching the merge block which we make the successor of the
1748+
// exit.
1749+
if (ExitBlockSet.count(DeadBlocks[i]))
1750+
continue;
1751+
1752+
// Insert the children that are within the loop or exit block set. Other
1753+
// children may reach out of the loop. While we don't expect these to be
1754+
// dead (as the unswitched clone should reach them) we don't try to prove
1755+
// that here.
1756+
for (DomTreeNode *ChildN : *DT[DeadBlocks[i]])
1757+
if (L.contains(ChildN->getBlock()) ||
1758+
ExitBlockSet.count(ChildN->getBlock()))
1759+
DeadBlocks.push_back(ChildN->getBlock());
1760+
}
1761+
}
1762+
1763+
// Add the remaining edges to our updates and apply them to get an up-to-date
1764+
// dominator tree. Note that this will cause the dead blocks above to be
1765+
// unreachable and no longer in the dominator tree.
1766+
DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
1767+
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
1768+
DT.applyUpdates(DTUpdates);
1769+
1770+
// Build the cloned loop structure itself. This may be substantially
1771+
// different from the original structure due to the simplified CFG. This also
1772+
// handles inserting all the cloned blocks into the correct loops.
1773+
SmallVector<Loop *, 4> NonChildClonedLoops;
1774+
Loop *ClonedL =
1775+
buildClonedLoops(L, ExitBlocks, VMap, LI, NonChildClonedLoops);
1776+
17661777
// Delete anything that was made dead in the original loop due to
17671778
// unswitching.
1768-
if (DeleteUnswitchedSucc)
1769-
deleteDeadBlocksFromLoop(L, UnswitchedSuccBB, ExitBlocks, DT, LI);
1779+
if (!DeadBlocks.empty())
1780+
deleteDeadBlocksFromLoop(L, DeadBlocks, ExitBlocks, DT, LI);
17701781

17711782
SmallVector<Loop *, 4> HoistedLoops;
17721783
bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);
17731784

1774-
// This will have completely invalidated the dominator tree. We can't easily
1775-
// bound how much is invalid because in some cases we will refine the
1776-
// predecessor set of exit blocks of the loop which can move large unrelated
1777-
// regions of code into a new subtree.
1778-
//
1779-
// FIXME: Eventually, we should use an incremental update utility that
1780-
// leverages the existing information in the dominator tree (and potentially
1781-
// the nature of the change) to more efficiently update things.
1782-
DT.recalculate(*SplitBB->getParent());
1785+
// This transformation has a high risk of corrupting the dominator tree, and
1786+
// the below steps to rebuild loop structures will result in hard to debug
1787+
// errors in that case so verify that the dominator tree is sane first.
1788+
// FIXME: Remove this when the bugs stop showing up and rely on existing
1789+
// verification steps.
1790+
assert(DT.verify(DominatorTree::VerificationLevel::Fast));
17831791

17841792
// We can change which blocks are exit blocks of all the cloned sibling
17851793
// loops, the current loop, and any parent loops which shared exit blocks

0 commit comments

Comments
 (0)