Index: lib/Transforms/Scalar/LoopInterchange.cpp =================================================================== --- lib/Transforms/Scalar/LoopInterchange.cpp +++ lib/Transforms/Scalar/LoopInterchange.cpp @@ -402,7 +402,9 @@ /// Interchange OuterLoop and InnerLoop. bool transform(); - void restructureLoops(Loop *InnerLoop, Loop *OuterLoop); + void restructureLoops(Loop *NewInner, Loop *NewOuter, + BasicBlock *OrigInnerPreHeader, + BasicBlock *OrigOuterPreHeader); void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop); private: @@ -453,6 +455,7 @@ AU.addRequired(); AU.addPreserved(); + AU.addPreserved(); } bool runOnFunction(Function &F) override { @@ -1153,23 +1156,77 @@ llvm_unreachable("Couldn't find loop"); } -void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop, - Loop *OuterLoop) { +/// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the +/// new inner and outer loop after interchanging: NewInner is the original +/// outer loop and NewOuter is the original inner loop. +/// +/// Before interchanging, we have the following structure +/// Outer preheader +// Outer header +// Inner preheader +// Inner header +// Inner body +// Inner latch +// outer bbs +// Outer latch +// +// After interchanging: +// Inner preheader +// Inner header +// Outer preheader +// Outer header +// Inner body +// outer bbs +// Outer latch +// Inner latch +void LoopInterchangeTransform::restructureLoops( + Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader, + BasicBlock *OrigOuterPreHeader) { Loop *OuterLoopParent = OuterLoop->getParentLoop(); + // The original inner loop preheader moves from the new inner loop to + // the parent loop, if there is one. + NewInner->removeBlockFromLoop(OrigInnerPreHeader); + LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent); + + // Switch the loop levels. if (OuterLoopParent) { // Remove the loop from its parent loop. - removeChildLoop(OuterLoopParent, OuterLoop); - removeChildLoop(OuterLoop, InnerLoop); - OuterLoopParent->addChildLoop(InnerLoop); + removeChildLoop(OuterLoopParent, NewInner); + removeChildLoop(NewInner, NewOuter); + OuterLoopParent->addChildLoop(NewOuter); } else { - removeChildLoop(OuterLoop, InnerLoop); - LI->changeTopLevelLoop(OuterLoop, InnerLoop); + removeChildLoop(NewInner, NewOuter); + LI->changeTopLevelLoop(NewInner, NewOuter); + } + while (!NewOuter->empty()) + NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin())); + NewOuter->addChildLoop(NewInner); + + // BBs from the original inner loop. + SmallVector OrigInnerBBs(NewOuter->blocks()); + + // Add BBs from the original outer loop to the original inner loop (excluding + // BBs already in inner loop) + for (BasicBlock *BB : NewInner->blocks()) + if (LI->getLoopFor(BB) == NewInner) + NewOuter->addBlockEntry(BB); + + // Now remove inner loop header and latch from the new inner loop and move + // other BBs (the loop body) to the new inner loop. + BasicBlock *OuterHeader = NewOuter->getHeader(); + BasicBlock *OuterLatch = NewOuter->getLoopLatch(); + for (BasicBlock *BB : OrigInnerBBs) { + // Remove the new outer loop header and latch from the new inner loop. + if (BB == OuterHeader || BB == OuterLatch) + NewInner->removeBlockFromLoop(BB); + else + LI->changeLoopFor(BB, NewInner); } - while (!InnerLoop->empty()) - OuterLoop->addChildLoop(InnerLoop->removeChildLoop(InnerLoop->begin())); - - InnerLoop->addChildLoop(OuterLoop); + // The preheader of the original outer loop becomes part of the new + // outer loop. + NewOuter->addBlockEntry(OrigOuterPreHeader); + LI->changeLoopFor(OrigOuterPreHeader, NewOuter); } bool LoopInterchangeTransform::transform() { @@ -1212,7 +1269,6 @@ return false; } - restructureLoops(InnerLoop, OuterLoop); return true; } @@ -1382,6 +1438,9 @@ updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch); DT->applyUpdates(DTUpdates); + restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader, + OuterLoopPreHeader); + return true; } Index: test/Transforms/LoopInterchange/call-instructions.ll =================================================================== --- test/Transforms/LoopInterchange/call-instructions.ll +++ test/Transforms/LoopInterchange/call-instructions.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t +; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -verify-loop-info -verify-dom-info ; RUN: FileCheck --input-file=%t %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Transforms/LoopInterchange/currentLimitation.ll =================================================================== --- test/Transforms/LoopInterchange/currentLimitation.ll +++ test/Transforms/LoopInterchange/currentLimitation.ll @@ -1,5 +1,6 @@ ; REQUIRES: asserts -; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S -debug 2>&1 | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info \ +; RUN: -S -debug 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll =================================================================== --- test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll +++ test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll @@ -1,5 +1,6 @@ ; REQUIRES: asserts -; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S -debug 2>&1 | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info \ +; RUN: -S -debug 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll =================================================================== --- test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll +++ test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -basicaa -da-delinearize -loop-interchange -verify-dom-info -S -pass-remarks=loop-interchange 2>&1 | FileCheck %s +; RUN: opt < %s -basicaa -da-delinearize -loop-interchange -verify-dom-info -verify-loop-info \ +; RUN: -S -pass-remarks=loop-interchange 2>&1 | FileCheck %s @A10 = local_unnamed_addr global [3 x [3 x i32]] zeroinitializer, align 16 Index: test/Transforms/LoopInterchange/interchange-no-deps.ll =================================================================== --- test/Transforms/LoopInterchange/interchange-no-deps.ll +++ test/Transforms/LoopInterchange/interchange-no-deps.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -loop-interchange -simplifycfg -S -pass-remarks=loop-interchange 2>&1 | FileCheck %s +; RUN: opt < %s -loop-interchange -verify-dom-info -verify-loop-info -S \ +; RUN: -pass-remarks=loop-interchange 2>&1 | FileCheck %s ; CHECK: Loop interchanged with enclosing loop. ; no_deps_interchange just access a single nested array and can be interchange. Index: test/Transforms/LoopInterchange/interchangeable.ll =================================================================== --- test/Transforms/LoopInterchange/interchangeable.ll +++ test/Transforms/LoopInterchange/interchangeable.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll =================================================================== --- test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll +++ test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll @@ -1,7 +1,8 @@ ; Test optimization remarks generated by the LoopInterchange pass. ; -; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-output=%t -pass-remarks-missed='loop-interchange' \ -; RUN: -pass-remarks='loop-interchange' -S +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info \ +; RUN: -pass-remarks-output=%t -pass-remarks-missed='loop-interchange' \ +; RUN: -pass-remarks='loop-interchange' -S ; RUN: cat %t | FileCheck %s @A = common global [100 x [100 x i32]] zeroinitializer Index: test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll =================================================================== --- test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll +++ test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll @@ -1,5 +1,6 @@ ; REQUIRES: asserts -; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S -debug 2>&1 | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info \ +; RUN: -S -debug 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll =================================================================== --- test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll +++ test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll @@ -1,5 +1,6 @@ ; REQUIRES: asserts -; RUN: opt < %s -basicaa -loop-interchange -S -debug 2>&1 | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info \ +; RUN: -S -debug 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll =================================================================== --- test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll +++ test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll @@ -1,5 +1,6 @@ ; REQUIRES: asserts -; RUN: opt < %s -basicaa -loop-interchange -S -debug 2>&1 | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info \ +; RUN: -S -debug 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/Transforms/LoopInterchange/phi-ordering.ll =================================================================== --- test/Transforms/LoopInterchange/phi-ordering.ll +++ test/Transforms/LoopInterchange/phi-ordering.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -loop-interchange -verify-dom-info -S 2>&1 | FileCheck %s +; RUN: opt < %s -loop-interchange -verify-dom-info -verify-loop-info -S 2>&1 | FileCheck %s ;; Checks the order of the inner phi nodes does not cause havoc. ;; The inner loop has a reduction into c. The IV is not the first phi. Index: test/Transforms/LoopInterchange/profitability.ll =================================================================== --- test/Transforms/LoopInterchange/profitability.ll +++ test/Transforms/LoopInterchange/profitability.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-interchange -pass-remarks-output=%t \ +; RUN: opt < %s -loop-interchange -pass-remarks-output=%t -verify-dom-info -verify-loop-info \ ; RUN: -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange ; RUN: FileCheck -input-file %t %s Index: test/Transforms/LoopInterchange/reductions.ll =================================================================== --- test/Transforms/LoopInterchange/reductions.ll +++ test/Transforms/LoopInterchange/reductions.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S -debug 2>&1 | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info -S -debug 2>&1 | FileCheck %s @A = common global [500 x [500 x i32]] zeroinitializer @X = common global i32 0