Index: lib/Transforms/Scalar/LoopInterchange.cpp =================================================================== --- lib/Transforms/Scalar/LoopInterchange.cpp +++ lib/Transforms/Scalar/LoopInterchange.cpp @@ -402,7 +402,9 @@ /// Interchange OuterLoop and InnerLoop. bool transform(); - void restructureLoops(Loop *InnerLoop, Loop *OuterLoop); + void restructureLoops(Loop *InnerLoop, Loop *OuterLoop, + BasicBlock *OrigInnerPreHeader, + BasicBlock *OrigOuterPreHeader); void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop); private: @@ -453,6 +455,7 @@ AU.addRequired(); AU.addPreserved(); + AU.addPreserved(); } bool runOnFunction(Function &F) override { @@ -1153,9 +1156,40 @@ llvm_unreachable("Couldn't find loop"); } -void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop, - Loop *OuterLoop) { +/// Update LoopInfo, after interchanging InnerLoop with OuterLoop. +/// +/// Before interchanging, we have the following structure +/// Outer preheader +// Outer header +// Inner preheader +// Inner header +// Inner body +// Inner latch +// outer bbs +// Outer latch +// +// After interchanging: +// Inner preheader +// Inner header +// Outer preheader +// Outer header +// Inner body +// outer bbs +// Outer latch +// Inner latch +void LoopInterchangeTransform::restructureLoops( + Loop *InnerLoop, Loop *OuterLoop, BasicBlock *OrigInnerPreHeader, + BasicBlock *OrigOuterPreHeader) { Loop *OuterLoopParent = OuterLoop->getParentLoop(); + // The original inner loop preheader moves from the original outer loop to + // the parent loop, if there is one. + if (OuterLoopParent) { + OuterLoop->removeBlockFromLoop(OrigInnerPreHeader); + LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent); + } else + LI->removeBlock(OrigInnerPreHeader); + + // Switch the loop levels. if (OuterLoopParent) { // Remove the loop from its parent loop. removeChildLoop(OuterLoopParent, OuterLoop); @@ -1165,11 +1199,32 @@ removeChildLoop(OuterLoop, InnerLoop); LI->changeTopLevelLoop(OuterLoop, InnerLoop); } - while (!InnerLoop->empty()) OuterLoop->addChildLoop(InnerLoop->removeChildLoop(InnerLoop->begin())); - InnerLoop->addChildLoop(OuterLoop); + + SmallVector InnerBBs(InnerLoop->blocks()); + + // Add BBs from the original outer loop to the original inner loop (excluding + // BBs already in inner loop) + for (BasicBlock *BB : OuterLoop->blocks()) + if (LI->getLoopFor(BB) == OuterLoop) + InnerLoop->addBlockEntry(BB); + + // Now remove inner loop header and latch from outer loop and move other BBs + // from the original inner loop to the original outer loop + BasicBlock *InnerHeader = InnerLoop->getHeader(); + BasicBlock *InnerLatch = InnerLoop->getLoopLatch(); + for (BasicBlock *BB : InnerBBs) { + // original inner loop header and latch stay in the original inner loop. + if (BB == InnerHeader || BB == InnerLatch) + OuterLoop->removeBlockFromLoop(BB); + else + LI->changeLoopFor(BB, OuterLoop); + } + + InnerLoop->addBlockEntry(OrigOuterPreHeader); + LI->changeLoopFor(OrigOuterPreHeader, InnerLoop); } bool LoopInterchangeTransform::transform() { @@ -1212,7 +1267,6 @@ return false; } - restructureLoops(InnerLoop, OuterLoop); return true; } @@ -1382,6 +1436,9 @@ updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch); DT->applyUpdates(DTUpdates); + restructureLoops(InnerLoop, OuterLoop, InnerLoopPreHeader, + OuterLoopPreHeader); + return true; } Index: test/Transforms/LoopInterchange/call-instructions.ll =================================================================== --- test/Transforms/LoopInterchange/call-instructions.ll +++ test/Transforms/LoopInterchange/call-instructions.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t +; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -verify-loop-info -verify-dom-info ; RUN: FileCheck --input-file=%t %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Transforms/LoopInterchange/interchangeable.ll =================================================================== --- test/Transforms/LoopInterchange/interchangeable.ll +++ test/Transforms/LoopInterchange/interchangeable.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/Transforms/LoopInterchange/phi-ordering.ll =================================================================== --- test/Transforms/LoopInterchange/phi-ordering.ll +++ test/Transforms/LoopInterchange/phi-ordering.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -loop-interchange -verify-dom-info -S 2>&1 | FileCheck %s +; RUN: opt < %s -loop-interchange -verify-dom-info -verify-loop-info -S 2>&1 | FileCheck %s ;; Checks the order of the inner phi nodes does not cause havoc. ;; The inner loop has a reduction into c. The IV is not the first phi. Index: test/Transforms/LoopInterchange/reductions.ll =================================================================== --- test/Transforms/LoopInterchange/reductions.ll +++ test/Transforms/LoopInterchange/reductions.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S -debug 2>&1 | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info -S -debug 2>&1 | FileCheck %s @A = common global [500 x [500 x i32]] zeroinitializer @X = common global i32 0