Index: lib/Transforms/Scalar/LoopInterchange.cpp =================================================================== --- lib/Transforms/Scalar/LoopInterchange.cpp +++ lib/Transforms/Scalar/LoopInterchange.cpp @@ -400,7 +400,12 @@ bool InnerLoopContainsReductions) : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LoopExit(LoopNestExit), - InnerLoopHasReduction(InnerLoopContainsReductions) {} + InnerLoopHasReduction(InnerLoopContainsReductions), DTUpdates() {} + + ~LoopInterchangeTransform() { + DT->applyUpdates(DTUpdates); + DTUpdates.clear(); + } /// Interchange OuterLoop and InnerLoop. bool transform(); @@ -426,6 +431,7 @@ DominatorTree *DT; BasicBlock *LoopExit; bool InnerLoopHasReduction; + std::vector DTUpdates; }; // Main LoopInterchange Pass. @@ -453,6 +459,8 @@ AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); AU.addRequired(); + + AU.addPreserved(); } bool runOnFunction(Function &F) override { @@ -462,8 +470,7 @@ SE = &getAnalysis().getSE(); LI = &getAnalysis().getLoopInfo(); DI = &getAnalysis().getDI(); - auto *DTWP = getAnalysisIfAvailable(); - DT = DTWP ? &DTWP->getDomTree() : nullptr; + DT = &getAnalysis().getDomTree(); ORE = &getAnalysis().getORE(); PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); @@ -573,7 +580,6 @@ // Update the DependencyMatrix interChangeDependencies(DependencyMatrix, i, i - 1); - DT->recalculate(F); #ifdef DUMP_DEP_MATRICIES DEBUG(dbgs() << "Dependence after interchange\n"); printDepMatrix(DependencyMatrix); @@ -1265,6 +1271,24 @@ } } +/// \brief Update BI to jump to NewBB instead of OldBB. Records updates to +/// the dominator tree in DTUpdates, if DT should be preserved. +static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, + BasicBlock *NewBB, + std::vector &DTUpdates) { + unsigned NumSucc = BI->getNumSuccessors(); + for (unsigned i = 0; i < NumSucc; ++i) { + if (BI->getSuccessor(i) == OldBB) { + BI->setSuccessor(i, NewBB); + + DTUpdates.push_back( + {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB}); + DTUpdates.push_back( + {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB}); + } + } +} + bool LoopInterchangeTransform::adjustLoopBranches() { DEBUG(dbgs() << "adjustLoopBranches called\n"); // Adjust the loop preheader @@ -1306,27 +1330,18 @@ return false; // Adjust Loop Preheader and headers - - unsigned NumSucc = OuterLoopPredecessorBI->getNumSuccessors(); - for (unsigned i = 0; i < NumSucc; ++i) { - if (OuterLoopPredecessorBI->getSuccessor(i) == OuterLoopPreHeader) - OuterLoopPredecessorBI->setSuccessor(i, InnerLoopPreHeader); - } - - NumSucc = OuterLoopHeaderBI->getNumSuccessors(); - for (unsigned i = 0; i < NumSucc; ++i) { - if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch) - OuterLoopHeaderBI->setSuccessor(i, LoopExit); - else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader) - OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSuccessor); - } + updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader, + InnerLoopPreHeader, DTUpdates); + updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates); + updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader, + InnerLoopHeaderSuccessor, DTUpdates); // Adjust reduction PHI's now that the incoming block has changed. updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader, OuterLoopHeader); - BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI); - InnerLoopHeaderBI->eraseFromParent(); + updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor, + OuterLoopPreHeader, DTUpdates); // -------------Adjust loop latches----------- if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader) @@ -1334,11 +1349,8 @@ else InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0); - NumSucc = InnerLoopLatchPredecessorBI->getNumSuccessors(); - for (unsigned i = 0; i < NumSucc; ++i) { - if (InnerLoopLatchPredecessorBI->getSuccessor(i) == InnerLoopLatch) - InnerLoopLatchPredecessorBI->setSuccessor(i, InnerLoopLatchSuccessor); - } + updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch, + InnerLoopLatchSuccessor, DTUpdates); // Adjust PHI nodes in InnerLoopLatchSuccessor. Update all uses of PHI with // the value and remove this PHI node from inner loop. @@ -1358,19 +1370,13 @@ else OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0); - if (InnerLoopLatchBI->getSuccessor(1) == InnerLoopLatchSuccessor) - InnerLoopLatchBI->setSuccessor(1, OuterLoopLatchSuccessor); - else - InnerLoopLatchBI->setSuccessor(0, OuterLoopLatchSuccessor); + updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor, + OuterLoopLatchSuccessor, DTUpdates); + updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch, + DTUpdates); updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch); - if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopLatchSuccessor) { - OuterLoopLatchBI->setSuccessor(0, InnerLoopLatch); - } else { - OuterLoopLatchBI->setSuccessor(1, InnerLoopLatch); - } - return true; } Index: test/Transforms/LoopInterchange/call-instructions.ll =================================================================== --- test/Transforms/LoopInterchange/call-instructions.ll +++ test/Transforms/LoopInterchange/call-instructions.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s ;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Transforms/LoopInterchange/currentLimitation.ll =================================================================== --- test/Transforms/LoopInterchange/currentLimitation.ll +++ test/Transforms/LoopInterchange/currentLimitation.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s ;; These are test that fail to interchange due to current limitation. This will go off once we extend the loop interchange pass. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll =================================================================== --- test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll +++ test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s ;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll =================================================================== --- test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll +++ test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s @A10 = local_unnamed_addr global [3 x [3 x i32]] zeroinitializer, align 16 Index: test/Transforms/LoopInterchange/interchange-output-dependencies.ll =================================================================== --- test/Transforms/LoopInterchange/interchange-output-dependencies.ll +++ test/Transforms/LoopInterchange/interchange-output-dependencies.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s ;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Transforms/LoopInterchange/interchange-simple-count-down.ll =================================================================== --- test/Transforms/LoopInterchange/interchange-simple-count-down.ll +++ test/Transforms/LoopInterchange/interchange-simple-count-down.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s ;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Transforms/LoopInterchange/interchange-simple-count-up.ll =================================================================== --- test/Transforms/LoopInterchange/interchange-simple-count-up.ll +++ test/Transforms/LoopInterchange/interchange-simple-count-up.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s ;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll =================================================================== --- test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll +++ test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s ;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Transforms/LoopInterchange/phi-ordering.ll =================================================================== --- test/Transforms/LoopInterchange/phi-ordering.ll +++ test/Transforms/LoopInterchange/phi-ordering.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-interchange -S | FileCheck %s +; RUN: opt < %s -loop-interchange -verify-dom-info -S | FileCheck %s ;; Checks the order of the inner phi nodes does not cause havoc. ;; The inner loop has a reduction into c. The IV is not the first phi. Index: test/Transforms/LoopInterchange/profitability.ll =================================================================== --- test/Transforms/LoopInterchange/profitability.ll +++ test/Transforms/LoopInterchange/profitability.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s ;; We test profitability model in these test cases. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Transforms/LoopInterchange/reductions.ll =================================================================== --- test/Transforms/LoopInterchange/reductions.ll +++ test/Transforms/LoopInterchange/reductions.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s @A = common global [500 x [500 x i32]] zeroinitializer @X = common global i32 0