@@ -766,8 +766,9 @@ static BasicBlock *buildClonedLoopBlocks(
766
766
ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
767
767
BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
768
768
const SmallPtrSetImpl<BasicBlock *> &SkippedLoopAndExitBlocks,
769
- ValueToValueMapTy &VMap, AssumptionCache &AC, DominatorTree &DT,
770
- LoopInfo &LI) {
769
+ ValueToValueMapTy &VMap,
770
+ SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates, AssumptionCache &AC,
771
+ DominatorTree &DT, LoopInfo &LI) {
771
772
SmallVector<BasicBlock *, 4 > NewBlocks;
772
773
NewBlocks.reserve (L.getNumBlocks () + ExitBlocks.size ());
773
774
@@ -782,10 +783,6 @@ static BasicBlock *buildClonedLoopBlocks(
782
783
NewBlocks.push_back (NewBB);
783
784
VMap[OldBB] = NewBB;
784
785
785
- // Add the block to the domtree. We'll move it to the correct position
786
- // below.
787
- DT.addNewBlock (NewBB, SplitBB);
788
-
789
786
return NewBB;
790
787
};
791
788
@@ -824,17 +821,6 @@ static BasicBlock *buildClonedLoopBlocks(
824
821
assert (ClonedExitBB->getTerminator ()->getSuccessor (0 ) == MergeBB &&
825
822
" Cloned exit block has the wrong successor!" );
826
823
827
- // Move the merge block's idom to be the split point as one exit is
828
- // dominated by one header, and the other by another, so we know the split
829
- // point dominates both. While the dominator tree isn't fully accurate, we
830
- // want sub-trees within the original loop to be correctly reflect
831
- // dominance within that original loop (at least) and that requires moving
832
- // the merge block out of that subtree.
833
- // FIXME: This is very brittle as we essentially have a partial contract on
834
- // the dominator tree. We really need to instead update it and keep it
835
- // valid or stop relying on it.
836
- DT.changeImmediateDominator (MergeBB, SplitBB);
837
-
838
824
// Remap any cloned instructions and create a merge phi node for them.
839
825
for (auto ZippedInsts : llvm::zip_first (
840
826
llvm::make_range (ExitBB->begin (), std::prev (ExitBB->end ())),
@@ -896,6 +882,11 @@ static BasicBlock *buildClonedLoopBlocks(
896
882
for (PHINode &PN : ClonedSuccBB->phis ())
897
883
PN.removeIncomingValue (LoopBB, /* DeletePHIIfEmpty*/ false );
898
884
885
+ // Record the domtree updates for the new blocks.
886
+ for (auto *ClonedBB : NewBlocks)
887
+ for (auto *SuccBB : successors (ClonedBB))
888
+ DTUpdates.push_back ({DominatorTree::Insert, ClonedBB, SuccBB});
889
+
899
890
return ClonedPH;
900
891
}
901
892
@@ -1217,32 +1208,18 @@ static Loop *buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1217
1208
return ClonedL;
1218
1209
}
1219
1210
1220
- static void deleteDeadBlocksFromLoop (Loop &L, BasicBlock *DeadSubtreeRoot,
1221
- SmallVectorImpl<BasicBlock *> &ExitBlocks,
1222
- DominatorTree &DT, LoopInfo &LI) {
1223
- // Walk the dominator tree to build up the set of blocks we will delete here.
1224
- // The order is designed to allow us to always delete bottom-up and avoid any
1225
- // dangling uses.
1226
- SmallSetVector<BasicBlock *, 16 > DeadBlocks;
1227
- DeadBlocks.insert (DeadSubtreeRoot);
1228
- for (int i = 0 ; i < (int )DeadBlocks.size (); ++i)
1229
- for (DomTreeNode *ChildN : *DT[DeadBlocks[i]]) {
1230
- // FIXME: This assert should pass and that means we don't change nearly
1231
- // as much below! Consider rewriting all of this to avoid deleting
1232
- // blocks. They are always cloned before being deleted, and so instead
1233
- // could just be moved.
1234
- // FIXME: This in turn means that we might actually be more able to
1235
- // update the domtree.
1236
- assert ((L.contains (ChildN->getBlock ()) ||
1237
- llvm::find (ExitBlocks, ChildN->getBlock ()) != ExitBlocks.end ()) &&
1238
- " Should never reach beyond the loop and exits when deleting!" );
1239
- DeadBlocks.insert (ChildN->getBlock ());
1240
- }
1211
+ static void
1212
+ deleteDeadBlocksFromLoop (Loop &L,
1213
+ const SmallVectorImpl<BasicBlock *> &DeadBlocks,
1214
+ SmallVectorImpl<BasicBlock *> &ExitBlocks,
1215
+ DominatorTree &DT, LoopInfo &LI) {
1216
+ SmallPtrSet<BasicBlock *, 16 > DeadBlockSet (DeadBlocks.begin (),
1217
+ DeadBlocks.end ());
1241
1218
1242
1219
// Filter out the dead blocks from the exit blocks list so that it can be
1243
1220
// used in the caller.
1244
1221
llvm::erase_if (ExitBlocks,
1245
- [&](BasicBlock *BB) { return DeadBlocks .count (BB); });
1222
+ [&](BasicBlock *BB) { return DeadBlockSet .count (BB); });
1246
1223
1247
1224
// Remove these blocks from their successors.
1248
1225
for (auto *BB : DeadBlocks)
@@ -1254,38 +1231,39 @@ static void deleteDeadBlocksFromLoop(Loop &L, BasicBlock *DeadSubtreeRoot,
1254
1231
for (auto *BB : DeadBlocks)
1255
1232
ParentL->getBlocksSet ().erase (BB);
1256
1233
llvm::erase_if (ParentL->getBlocksVector (),
1257
- [&](BasicBlock *BB) { return DeadBlocks .count (BB); });
1234
+ [&](BasicBlock *BB) { return DeadBlockSet .count (BB); });
1258
1235
}
1259
1236
1260
1237
// Now delete the dead child loops. This raw delete will clear them
1261
1238
// recursively.
1262
1239
llvm::erase_if (L.getSubLoopsVector (), [&](Loop *ChildL) {
1263
- if (!DeadBlocks .count (ChildL->getHeader ()))
1240
+ if (!DeadBlockSet .count (ChildL->getHeader ()))
1264
1241
return false ;
1265
1242
1266
1243
assert (llvm::all_of (ChildL->blocks (),
1267
1244
[&](BasicBlock *ChildBB) {
1268
- return DeadBlocks .count (ChildBB);
1245
+ return DeadBlockSet .count (ChildBB);
1269
1246
}) &&
1270
1247
" If the child loop header is dead all blocks in the child loop must "
1271
1248
" be dead as well!" );
1272
1249
LI.destroy (ChildL);
1273
1250
return true ;
1274
1251
});
1275
1252
1276
- // Remove the mappings for the dead blocks.
1277
- for (auto *BB : DeadBlocks)
1253
+ // Remove the loop mappings for the dead blocks and drop all the references
1254
+ // from these blocks to others to handle cyclic references as we start
1255
+ // deleting the blocks themselves.
1256
+ for (auto *BB : DeadBlocks) {
1257
+ // Check that the dominator tree has already been updated.
1258
+ assert (!DT.getNode (BB) && " Should already have cleared domtree!" );
1278
1259
LI.changeLoopFor (BB, nullptr );
1279
-
1280
- // Drop all the references from these blocks to others to handle cyclic
1281
- // references as we start deleting the blocks themselves.
1282
- for (auto *BB : DeadBlocks)
1283
1260
BB->dropAllReferences ();
1261
+ }
1284
1262
1285
- for (auto *BB : llvm::reverse (DeadBlocks)) {
1286
- DT.eraseNode (BB);
1263
+ // Actually delete the blocks now that they've been fully unhooked from the
1264
+ // IR.
1265
+ for (auto *BB : DeadBlocks)
1287
1266
BB->eraseFromParent ();
1288
- }
1289
1267
}
1290
1268
1291
1269
// / Recompute the set of blocks in a loop after unswitching.
@@ -1737,17 +1715,13 @@ static bool unswitchInvariantBranch(
1737
1715
// Keep a mapping for the cloned values.
1738
1716
ValueToValueMapTy VMap;
1739
1717
1718
+ // Keep track of the dominator tree updates needed.
1719
+ SmallVector<DominatorTree::UpdateType, 4 > DTUpdates;
1720
+
1740
1721
// Build the cloned blocks from the loop.
1741
1722
auto *ClonedPH = buildClonedLoopBlocks (
1742
1723
L, LoopPH, SplitBB, ExitBlocks, ParentBB, UnswitchedSuccBB,
1743
- ContinueSuccBB, SkippedLoopAndExitBlocks, VMap, AC, DT, LI);
1744
-
1745
- // Build the cloned loop structure itself. This may be substantially
1746
- // different from the original structure due to the simplified CFG. This also
1747
- // handles inserting all the cloned blocks into the correct loops.
1748
- SmallVector<Loop *, 4 > NonChildClonedLoops;
1749
- Loop *ClonedL =
1750
- buildClonedLoops (L, ExitBlocks, VMap, LI, NonChildClonedLoops);
1724
+ ContinueSuccBB, SkippedLoopAndExitBlocks, VMap, DTUpdates, AC, DT, LI);
1751
1725
1752
1726
// Remove the parent as a predecessor of the unswitched successor.
1753
1727
UnswitchedSuccBB->removePredecessor (ParentBB, /* DontDeleteUselessPHIs*/ true );
@@ -1763,23 +1737,57 @@ static bool unswitchInvariantBranch(
1763
1737
// the one cloned).
1764
1738
BranchInst::Create (ContinueSuccBB, ParentBB);
1765
1739
1740
+ // Before we update the dominator tree, collect the dead blocks if we're going
1741
+ // to end up deleting the unswitched successor.
1742
+ SmallVector<BasicBlock *, 16 > DeadBlocks;
1743
+ if (DeleteUnswitchedSucc) {
1744
+ DeadBlocks.push_back (UnswitchedSuccBB);
1745
+ for (int i = 0 ; i < (int )DeadBlocks.size (); ++i) {
1746
+ // If we reach an exit block, stop recursing as the unswitched loop will
1747
+ // end up reaching the merge block which we make the successor of the
1748
+ // exit.
1749
+ if (ExitBlockSet.count (DeadBlocks[i]))
1750
+ continue ;
1751
+
1752
+ // Insert the children that are within the loop or exit block set. Other
1753
+ // children may reach out of the loop. While we don't expect these to be
1754
+ // dead (as the unswitched clone should reach them) we don't try to prove
1755
+ // that here.
1756
+ for (DomTreeNode *ChildN : *DT[DeadBlocks[i]])
1757
+ if (L.contains (ChildN->getBlock ()) ||
1758
+ ExitBlockSet.count (ChildN->getBlock ()))
1759
+ DeadBlocks.push_back (ChildN->getBlock ());
1760
+ }
1761
+ }
1762
+
1763
+ // Add the remaining edges to our updates and apply them to get an up-to-date
1764
+ // dominator tree. Note that this will cause the dead blocks above to be
1765
+ // unreachable and no longer in the dominator tree.
1766
+ DTUpdates.push_back ({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
1767
+ DTUpdates.push_back ({DominatorTree::Insert, SplitBB, ClonedPH});
1768
+ DT.applyUpdates (DTUpdates);
1769
+
1770
+ // Build the cloned loop structure itself. This may be substantially
1771
+ // different from the original structure due to the simplified CFG. This also
1772
+ // handles inserting all the cloned blocks into the correct loops.
1773
+ SmallVector<Loop *, 4 > NonChildClonedLoops;
1774
+ Loop *ClonedL =
1775
+ buildClonedLoops (L, ExitBlocks, VMap, LI, NonChildClonedLoops);
1776
+
1766
1777
// Delete anything that was made dead in the original loop due to
1767
1778
// unswitching.
1768
- if (DeleteUnswitchedSucc )
1769
- deleteDeadBlocksFromLoop (L, UnswitchedSuccBB , ExitBlocks, DT, LI);
1779
+ if (!DeadBlocks. empty () )
1780
+ deleteDeadBlocksFromLoop (L, DeadBlocks , ExitBlocks, DT, LI);
1770
1781
1771
1782
SmallVector<Loop *, 4 > HoistedLoops;
1772
1783
bool IsStillLoop = rebuildLoopAfterUnswitch (L, ExitBlocks, LI, HoistedLoops);
1773
1784
1774
- // This will have completely invalidated the dominator tree. We can't easily
1775
- // bound how much is invalid because in some cases we will refine the
1776
- // predecessor set of exit blocks of the loop which can move large unrelated
1777
- // regions of code into a new subtree.
1778
- //
1779
- // FIXME: Eventually, we should use an incremental update utility that
1780
- // leverages the existing information in the dominator tree (and potentially
1781
- // the nature of the change) to more efficiently update things.
1782
- DT.recalculate (*SplitBB->getParent ());
1785
+ // This transformation has a high risk of corrupting the dominator tree, and
1786
+ // the below steps to rebuild loop structures will result in hard to debug
1787
+ // errors in that case so verify that the dominator tree is sane first.
1788
+ // FIXME: Remove this when the bugs stop showing up and rely on existing
1789
+ // verification steps.
1790
+ assert (DT.verify (DominatorTree::VerificationLevel::Fast));
1783
1791
1784
1792
// We can change which blocks are exit blocks of all the cloned sibling
1785
1793
// loops, the current loop, and any parent loops which shared exit blocks
0 commit comments