Index: llvm/lib/Analysis/LoopNestAnalysis.cpp =================================================================== --- llvm/lib/Analysis/LoopNestAnalysis.cpp +++ llvm/lib/Analysis/LoopNestAnalysis.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/LoopNestAnalysis.h" #include "llvm/ADT/BreadthFirstIterator.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ValueTracking.h" @@ -70,20 +71,13 @@ return false; } - // Bail out if we cannot retrieve the outer loop bounds. - auto OuterLoopLB = OuterLoop.getBounds(SE); - if (OuterLoopLB == None) { - LLVM_DEBUG(dbgs() << "Cannot compute loop bounds of OuterLoop: " - << OuterLoop << "\n";); - return false; - } - // Identify the outer loop latch comparison instruction. const BasicBlock *Latch = OuterLoop.getLoopLatch(); assert(Latch && "Expecting a valid loop latch"); const BranchInst *BI = dyn_cast(Latch->getTerminator()); - assert(BI && BI->isConditional() && - "Expecting loop latch terminator to be a branch instruction"); + assert( + BI && BI->isConditional() && + "Expecting loop latch terminator to be a conditional branch instruction"); const CmpInst *OuterLoopLatchCmp = dyn_cast(BI->getCondition()); DEBUG_WITH_TYPE( @@ -103,6 +97,12 @@ << "\n"; }); + InductionDescriptor ID; + SmallVector StepInsts; + for (PHINode &PHI : OuterLoop.getHeader()->phis()) + if (InductionDescriptor::isInductionPHI(&PHI, &OuterLoop, &SE, ID)) + StepInsts.push_back(ID.getInductionBinOp()); + // Determine whether instructions in a basic block are one of: // - the inner loop guard comparison // - the outer loop latch comparison @@ -120,12 +120,20 @@ return false; } - // The only binary instruction allowed is the outer loop step instruction, - // the only comparison instructions allowed are the inner loop guard - // compare instruction and the outer loop latch compare instruction. - if ((isa(I) && &I != &OuterLoopLB->getStepInst()) || + // The only binary instruction allowed is the outer loop step + // instruction, the only comparison instructions allowed are the inner + // loop guard compare instruction and the outer loop latch compare + // instruction. For binary operations, if both of the operands are + // invariants of the inner loop, the corresponding operation can be + // considered in the inner loop. The loop is still perfectly nested. + if ((isa(I) && + std::find(StepInsts.begin(), StepInsts.end(), &I) == + StepInsts.end() && + !(InnerLoop.isLoopInvariant(I.getOperand(0)) && + InnerLoop.isLoopInvariant(I.getOperand(1)))) || (isa(I) && &I != OuterLoopLatchCmp && - &I != InnerLoopGuardCmp)) { + &I != InnerLoopGuardCmp) || + isa(I)) { DEBUG_WITH_TYPE(VerboseDebug, { dbgs() << "Instruction: " << I << "\nin basic block:" << BB << "is unsafe.\n"; @@ -251,12 +259,13 @@ const BasicBlock *ExtraPhiBlock = nullptr; // Ensure the only branch that may exist between the loops is the inner loop - // guard. + // guard. Or the branch jumps to the preheader of inner loop. if (OuterLoopHeader != InnerLoopPreHeader) { const BranchInst *BI = dyn_cast(OuterLoopHeader->getTerminator()); - if (!BI || BI != InnerLoop.getLoopGuardBranch()) + if (!BI || (BI != InnerLoop.getLoopGuardBranch() && + BI->getSuccessor(0) != InnerLoopPreHeader)) return false; bool InnerLoopExitContainsLCSSA = ContainsLCSSAPhi(*InnerLoopExit); @@ -294,12 +303,13 @@ // Ensure the inner loop exit block leads to the outer loop latch. const BasicBlock *SuccInner = InnerLoopExit->getSingleSuccessor(); - if (!SuccInner || - (SuccInner != OuterLoopLatch && SuccInner != ExtraPhiBlock)) { + if ((!SuccInner || + (SuccInner != OuterLoopLatch && SuccInner != ExtraPhiBlock)) && + InnerLoopExit != OuterLoopLatch) { DEBUG_WITH_TYPE( VerboseDebug, dbgs() << "Inner loop exit block " << *InnerLoopExit - << " does not directly lead to the outer loop latch.\n";); + << " does not directly lead/equal to the outer loop latch.\n";); return false; } Index: llvm/lib/Transforms/Scalar/LoopInterchange.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopNestAnalysis.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -346,9 +347,6 @@ } private: - bool tightlyNested(Loop *Outer, Loop *Inner); - bool containsUnsafeInstructions(BasicBlock *BB); - /// Discover induction and reduction PHIs in the header of \p L. Induction /// PHIs are added to \p Inductions, reductions are added to /// OuterInnerReductions. When the outer loop is passed, the inner loop needs @@ -578,51 +576,6 @@ } // end anonymous namespace -bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) { - return any_of(*BB, [](const Instruction &I) { - return I.mayHaveSideEffects() || I.mayReadFromMemory(); - }); -} - -bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) { - BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); - BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); - BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); - - LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n"); - - // A perfectly nested loop will not have any branch in between the outer and - // inner block i.e. outer header will branch to either inner preheader and - // outerloop latch. - BranchInst *OuterLoopHeaderBI = - dyn_cast(OuterLoopHeader->getTerminator()); - if (!OuterLoopHeaderBI) - return false; - - for (BasicBlock *Succ : successors(OuterLoopHeaderBI)) - if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() && - Succ != OuterLoopLatch) - return false; - - LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n"); - // We do not have any basic block in between now make sure the outer header - // and outer loop latch doesn't contain any unsafe instructions. - if (containsUnsafeInstructions(OuterLoopHeader) || - containsUnsafeInstructions(OuterLoopLatch)) - return false; - - // Also make sure the inner loop preheader does not contain any unsafe - // instructions. Note that all instructions in the preheader will be moved to - // the outer loop header when interchanging. - if (InnerLoopPreHeader != OuterLoopHeader && - containsUnsafeInstructions(InnerLoopPreHeader)) - return false; - - LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n"); - // We have a perfect loop nest. - return true; -} - bool LoopInterchangeLegality::isLoopStructureUnderstood( PHINode *InnerInduction) { unsigned Num = InnerInduction->getNumOperands(); @@ -932,11 +885,11 @@ continue; // The incoming value is defined in the outer loop latch. Currently we - // only support that in case the outer loop latch has a single predecessor. - // This guarantees that the outer loop latch is executed if and only if - // the inner loop is executed (because tightlyNested() guarantees that the - // outer loop header only branches to the inner loop or the outer loop - // latch). + // only support that in case the outer loop latch has a single + // predecessor. This guarantees that the outer loop latch is executed if + // and only if the inner loop is executed (because arePerfectlyNested() + // guarantees that the outer loop header only branches to the inner loop + // or the outer loop latch). // FIXME: We could weaken this logic and allow multiple predecessors, // if the values are produced outside the loop latch. We would need // additional logic to update the PHI nodes in the exit block as @@ -990,18 +943,25 @@ return false; } - // Check if the loops are tightly nested. - if (!tightlyNested(OuterLoop, InnerLoop)) { - LLVM_DEBUG(dbgs() << "Loops not tightly nested\n"); - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Cannot interchange loops because they are not tightly " - "nested."; - }); - return false; - } + // Check if the loops are perfectly nested. + Loop *LoopInner = InnerLoop; + Loop *LoopOuter = nullptr; + LoopNest LN(*OuterLoop, *SE); + do { + LoopOuter = LoopInner->getParentLoop(); + if (!LN.arePerfectlyNested(*LoopOuter, *LoopInner, *SE)) { + LLVM_DEBUG(dbgs() << "Loops not perfectly nested\n"); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NotPerfectlyNested", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Cannot interchange loops because they are not perfectly " + "nested."; + }); + return false; + } + LoopInner = LoopOuter; + } while (LoopOuter != OuterLoop); if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop, OuterInnerReductions)) { Index: llvm/test/Transforms/LoopInterchange/imperfectly-nested.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopInterchange/imperfectly-nested.ll @@ -0,0 +1,68 @@ +; REQUIRES: asserts +; RUN: opt < %s -basicaa -loop-interchange -S -debug 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@a = common dso_local local_unnamed_addr global i8 0, align 1 +@b = common dso_local local_unnamed_addr global [1 x [2 x i32]] zeroinitializer, align 4 +@c = common dso_local local_unnamed_addr global [1 x [9 x i8]] zeroinitializer, align 1 + +;; The following case is not perfectly nested, +;; should not be interchanged. +;; +;; char a; +;; int b[][2]; +;; char c[][9]; +;; int h() { +;; for (; f <= 2; f++) { +;; for (int j = 0; j <= 2; j++) { +;; b[j][1] = c[j][f]; +;; } +;; if (a) +;; } +;; return 0; +;; } + +; CHECK: Not perfectly nested: invalid loop structure. +; CHECK: Loops not perfectly nested +; CHECK: Not interchanging loops. Cannot prove legality. + +define i32 @h() { +outer.preheader: + %0 = load i8, i8* @a, align 1 + %tobool.i = icmp eq i8 %0, 0 + br label %outer.header + +outer.header: ; preds = %if.end.i, %outer.preheader + %indvars.iv4.i = phi i64 [ 0, %outer.preheader ], [ %indvars.iv.next5.i, %if.end.i ] + br label %inner.header + +inner.header: ; preds = %inner.header, %outer.header + %indvars.iv.i = phi i64 [ 0, %outer.header ], [ %indvars.iv.next.i, %inner.header ] + %arrayidx5.i = getelementptr inbounds [1 x [9 x i8]], [1 x [9 x i8]]* @c, i64 0, i64 %indvars.iv.i, i64 %indvars.iv4.i + %1 = load i8, i8* %arrayidx5.i, align 1 + %conv.i = zext i8 %1 to i32 + %arrayidx8.i = getelementptr inbounds [1 x [2 x i32]], [1 x [2 x i32]]* @b, i64 0, i64 %indvars.iv.i, i64 1 + store i32 %conv.i, i32* %arrayidx8.i, align 4 + %indvars.iv.next.i = add nuw nsw i64 %indvars.iv.i, 1 + %exitcond.i = icmp eq i64 %indvars.iv.next.i, 3 + br i1 %exitcond.i, label %for.cond.cleanup.i, label %inner.header + +for.cond.cleanup.i: ; preds = %inner.header + br i1 %tobool.i, label %if.end.i, label %if.then.i + +if.then.i: ; preds = %for.cond.cleanup.i + br label %if.end.i + +if.end.i: ; preds = %if.then.i, %for.cond.cleanup.i + %indvars.iv.next5.i = add nuw nsw i64 %indvars.iv4.i, 1 + %exitcond6.i = icmp eq i64 %indvars.iv.next5.i, 3 + br i1 %exitcond6.i, label %outer.exit, label %outer.header + +outer.exit: ; preds = %if.end.i + br label %h.exit + +h.exit: ; preds = %outer.exit + ret i32 0 +} Index: llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll =================================================================== --- llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll +++ llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll @@ -251,8 +251,8 @@ ; DELIN: --- !Missed ; DELIN-NEXT: Pass: loop-interchange -; DELIN-NEXT: Name: NotTightlyNested +; DELIN-NEXT: Name: NotPerfectlyNested ; DELIN-NEXT: Function: test04 ; DELIN-NEXT: Args: -; DELIN-NEXT: - String: Cannot interchange loops because they are not tightly nested. +; DELIN-NEXT: - String: Cannot interchange loops because they are not perfectly nested. ; DELIN-NEXT: ...