diff --git a/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/llvm/include/llvm/Analysis/LoopNestAnalysis.h --- a/llvm/include/llvm/Analysis/LoopNestAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopNestAnalysis.h @@ -59,6 +59,12 @@ /// getMaxPerfectDepth(Loop_i) would return 2. static unsigned getMaxPerfectDepth(const Loop &Root, ScalarEvolution &SE); + /// Recursivelly traverse all empty 'single successor' basic blocks of \p From + /// (if there are any). Return the last basic block found or \p End if it was + /// reached during the search. + static const BasicBlock &skipEmptyBlockUntil(const BasicBlock *From, + const BasicBlock *End); + /// Return the outermost loop in the loop nest. Loop &getOutermostLoop() const { return *Loops.front(); } @@ -128,6 +134,12 @@ [](const Loop *L) { return L->isLoopSimplifyForm(); }); } + /// Return true if all loops in the loop nest are in rotated form. + bool areAllLoopsRotatedForm() const { + return std::all_of(Loops.begin(), Loops.end(), + [](const Loop *L) { return L->isRotatedForm(); }); + } + StringRef getName() const { return Loops.front()->getName(); } protected: diff --git a/llvm/lib/Analysis/LoopNestAnalysis.cpp b/llvm/lib/Analysis/LoopNestAnalysis.cpp --- a/llvm/lib/Analysis/LoopNestAnalysis.cpp +++ b/llvm/lib/Analysis/LoopNestAnalysis.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; @@ -206,6 +207,31 @@ return CurrentDepth; } +const BasicBlock &LoopNest::skipEmptyBlockUntil(const BasicBlock *From, + const BasicBlock *End) { + assert(From && "Expecting valid From"); + assert(End && "Expecting valid End"); + + if (From == End || !From->getSingleSuccessor()) + return *From; + + auto IsEmpty = [](const BasicBlock *BB) { + return (BB->getInstList().size() == 1); + }; + + // Visited is used to avoid running into an infinite loop. + SmallPtrSet Visited; + const BasicBlock *BB = From->getSingleSuccessor(); + const BasicBlock *PredBB = BB; + while (BB && BB != End && IsEmpty(BB) && !Visited.count(BB)) { + Visited.insert(BB); + PredBB = BB; + BB = BB->getSingleSuccessor(); + } + + return (BB == End) ? *End : *PredBB; +} + static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop, ScalarEvolution &SE) { // The inner loop must be the only outer loop's child. @@ -253,49 +279,67 @@ // Ensure the only branch that may exist between the loops is the inner loop // guard. if (OuterLoopHeader != InnerLoopPreHeader) { - const BranchInst *BI = - dyn_cast(OuterLoopHeader->getTerminator()); - - if (!BI || BI != InnerLoop.getLoopGuardBranch()) - return false; - - bool InnerLoopExitContainsLCSSA = ContainsLCSSAPhi(*InnerLoopExit); - - // The successors of the inner loop guard should be the inner loop - // preheader and the outer loop latch. - for (const BasicBlock *Succ : BI->successors()) { - if (Succ == InnerLoopPreHeader) - continue; - if (Succ == OuterLoopLatch) - continue; - - // If `InnerLoopExit` contains LCSSA Phi instructions, additional block - // may be inserted before the `OuterLoopLatch` to which `BI` jumps. The - // loops are still considered perfectly nested if the extra block only - // contains Phi instructions from InnerLoopExit and OuterLoopHeader. - if (InnerLoopExitContainsLCSSA && IsExtraPhiBlock(*Succ) && - Succ->getSingleSuccessor() == OuterLoopLatch) { - // Points to the extra block so that we can reference it later in the - // final check. We can also conclude that the inner loop is - // guarded and there exists LCSSA Phi node in the exit block later if we - // see a non-null `ExtraPhiBlock`. - ExtraPhiBlock = Succ; - continue; - } + const BasicBlock &SingleSucc = + LoopNest::skipEmptyBlockUntil(OuterLoopHeader, InnerLoopPreHeader); - DEBUG_WITH_TYPE(VerboseDebug, { - dbgs() << "Inner loop guard successor " << Succ->getName() - << " doesn't lead to inner loop preheader or " - "outer loop latch.\n"; - }); - return false; + // no conditional branch present + if (&SingleSucc != InnerLoopPreHeader) { + const BranchInst *BI = dyn_cast(SingleSucc.getTerminator()); + + if (!BI || BI != InnerLoop.getLoopGuardBranch()) + return false; + + bool InnerLoopExitContainsLCSSA = ContainsLCSSAPhi(*InnerLoopExit); + + // The successors of the inner loop guard should be the inner loop + // preheader or the outer loop latch possibly through empty blocks. + for (const BasicBlock *Succ : BI->successors()) { + const BasicBlock *PotentialInnerPreHeader = Succ; + const BasicBlock *PotentialOuterLatch = Succ; + + // Ensure the inner loop guard successor is empty before skipping + // blocks. + if (Succ->getInstList().size() == 1) { + PotentialInnerPreHeader = + &LoopNest::skipEmptyBlockUntil(Succ, InnerLoopPreHeader); + PotentialOuterLatch = + &LoopNest::skipEmptyBlockUntil(Succ, OuterLoopLatch); + } + + if (PotentialInnerPreHeader == InnerLoopPreHeader) + continue; + if (PotentialOuterLatch == OuterLoopLatch) + continue; + + // If `InnerLoopExit` contains LCSSA Phi instructions, additional block + // may be inserted before the `OuterLoopLatch` to which `BI` jumps. The + // loops are still considered perfectly nested if the extra block only + // contains Phi instructions from InnerLoopExit and OuterLoopHeader. + if (InnerLoopExitContainsLCSSA && IsExtraPhiBlock(*Succ) && + Succ->getSingleSuccessor() == OuterLoopLatch) { + // Points to the extra block so that we can reference it later in the + // final check. We can also conclude that the inner loop is + // guarded and there exists LCSSA Phi node in the exit block later if + // we see a non-null `ExtraPhiBlock`. + ExtraPhiBlock = Succ; + continue; + } + + DEBUG_WITH_TYPE(VerboseDebug, { + dbgs() << "Inner loop guard successor " << Succ->getName() + << " doesn't lead to inner loop preheader or " + "outer loop latch.\n"; + }); + return false; + } } } - // Ensure the inner loop exit block leads to the outer loop latch. - const BasicBlock *SuccInner = InnerLoopExit->getSingleSuccessor(); - if (!SuccInner || - (SuccInner != OuterLoopLatch && SuccInner != ExtraPhiBlock)) { + // Ensure the inner loop exit block lead to the outer loop latch possibly + // through empty blocks. + const BasicBlock &SuccInner = + LoopNest::skipEmptyBlockUntil(InnerLoop.getExitBlock(), OuterLoopLatch); + if (&SuccInner != OuterLoopLatch && &SuccInner != ExtraPhiBlock) { DEBUG_WITH_TYPE( VerboseDebug, dbgs() << "Inner loop exit block " << *InnerLoopExit diff --git a/llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll b/llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll --- a/llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll +++ b/llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll @@ -85,6 +85,55 @@ ret void } +define void @perf_nest_2D_3(i32** %y, i32** %x, i64 signext %nx, i64 signext %ny) { +; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: perf_nest_2D_3_loop_j, Loops: ( perf_nest_2D_3_loop_j ) +; CHECK-LABEL: IsPerfect=true, Depth=2, OutermostLoop: perf_nest_2D_3_loop_i, Loops: ( perf_nest_2D_3_loop_i perf_nest_2D_3_loop_j ) +entry: + br label %perf_nest_2D_3_loop_i + +perf_nest_2D_3_loop_i: + %i = phi i64 [ 0, %entry ], [ %inc13, %inc_i ] + %cmp21 = icmp slt i64 0, %ny + br label %singleSucc + +singleSucc: + br i1 %cmp21, label %preheader.j, label %for.end + +preheader.j: + br label %perf_nest_2D_3_loop_j + +perf_nest_2D_3_loop_j: + %j = phi i64 [ 0, %preheader.j ], [ %inc, %inc_j ] + %arrayidx = getelementptr inbounds i32*, i32** %x, i64 %j + %0 = load i32*, i32** %arrayidx, align 8 + %arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %j + %1 = load i32, i32* %arrayidx6, align 4 + %arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %j + %2 = load i32*, i32** %arrayidx8, align 8 + %arrayidx11 = getelementptr inbounds i32, i32* %2, i64 %i + store i32 %1, i32* %arrayidx11, align 4 + br label %inc_j + +inc_j: + %inc = add nsw i64 %j, 1 + %cmp2 = icmp slt i64 %inc, %ny + br i1 %cmp2, label %perf_nest_2D_3_loop_j, label %for.exit + +for.exit: + br label %for.end + +for.end: + br label %inc_i + +inc_i: + %inc13 = add nsw i64 %i, 1 + %cmp = icmp slt i64 %inc13, %nx + br i1 %cmp, label %perf_nest_2D_3_loop_i, label %perf_nest_2D_3_loop_i_end + +perf_nest_2D_3_loop_i_end: + ret void +} + ; Test a perfect 3-dim loop nest of the form: ; for (i=0; i