diff --git a/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/llvm/include/llvm/Analysis/LoopNestAnalysis.h --- a/llvm/include/llvm/Analysis/LoopNestAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopNestAnalysis.h @@ -128,6 +128,12 @@ [](const Loop *L) { return L->isLoopSimplifyForm(); }); } + /// Return true if all loops in the loop nest are in rotated form. + bool areAllLoopsRotatedForm() const { + return std::all_of(Loops.begin(), Loops.end(), + [](const Loop *L) { return L->isRotatedForm(); }); + } + StringRef getName() const { return Loops.front()->getName(); } protected: diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -244,6 +244,12 @@ const CriticalEdgeSplittingOptions &Options = CriticalEdgeSplittingOptions()); +/// Recursivelly traverse all empty 'single successor' basic blocks of \p From +/// (if there are any). Return the last basic block found or \p End if it was +/// reached during the search. +const BasicBlock &skipEmptyBlockUntil(const BasicBlock *From, + const BasicBlock *End); + /// Split the edge connecting the specified blocks, and return the newly created /// basic block between \p From and \p To. BasicBlock *SplitEdge(BasicBlock *From, BasicBlock *To, diff --git a/llvm/lib/Analysis/LoopNestAnalysis.cpp b/llvm/lib/Analysis/LoopNestAnalysis.cpp --- a/llvm/lib/Analysis/LoopNestAnalysis.cpp +++ b/llvm/lib/Analysis/LoopNestAnalysis.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; @@ -253,49 +254,66 @@ // Ensure the only branch that may exist between the loops is the inner loop // guard. if (OuterLoopHeader != InnerLoopPreHeader) { - const BranchInst *BI = - dyn_cast(OuterLoopHeader->getTerminator()); - - if (!BI || BI != InnerLoop.getLoopGuardBranch()) - return false; - - bool InnerLoopExitContainsLCSSA = ContainsLCSSAPhi(*InnerLoopExit); - - // The successors of the inner loop guard should be the inner loop - // preheader and the outer loop latch. - for (const BasicBlock *Succ : BI->successors()) { - if (Succ == InnerLoopPreHeader) - continue; - if (Succ == OuterLoopLatch) - continue; - - // If `InnerLoopExit` contains LCSSA Phi instructions, additional block - // may be inserted before the `OuterLoopLatch` to which `BI` jumps. The - // loops are still considered perfectly nested if the extra block only - // contains Phi instructions from InnerLoopExit and OuterLoopHeader. - if (InnerLoopExitContainsLCSSA && IsExtraPhiBlock(*Succ) && - Succ->getSingleSuccessor() == OuterLoopLatch) { - // Points to the extra block so that we can reference it later in the - // final check. We can also conclude that the inner loop is - // guarded and there exists LCSSA Phi node in the exit block later if we - // see a non-null `ExtraPhiBlock`. - ExtraPhiBlock = Succ; - continue; - } + const BasicBlock &SingleSucc = + skipEmptyBlockUntil(OuterLoopHeader, InnerLoopPreHeader); - DEBUG_WITH_TYPE(VerboseDebug, { - dbgs() << "Inner loop guard successor " << Succ->getName() - << " doesn't lead to inner loop preheader or " - "outer loop latch.\n"; - }); - return false; + // no conditional branch present + if (&SingleSucc != InnerLoopPreHeader) { + const BranchInst *BI = dyn_cast(SingleSucc.getTerminator()); + + if (!BI || BI != InnerLoop.getLoopGuardBranch()) + return false; + + bool InnerLoopExitContainsLCSSA = ContainsLCSSAPhi(*InnerLoopExit); + + // The successors of the inner loop guard should be the inner loop + // preheader or the outer loop latch possibly through empty blocks. + for (const BasicBlock *Succ : BI->successors()) { + const BasicBlock *PotentialInnerPreHeader = Succ; + const BasicBlock *PotentialOuterLatch = Succ; + + // Ensure the inner loop guard successor is empty before skipping + // blocks. + if (Succ->getInstList().size() == 1) { + PotentialInnerPreHeader = + &skipEmptyBlockUntil(Succ, InnerLoopPreHeader); + PotentialOuterLatch = &skipEmptyBlockUntil(Succ, OuterLoopLatch); + } + + if (PotentialInnerPreHeader == InnerLoopPreHeader) + continue; + if (PotentialOuterLatch == OuterLoopLatch) + continue; + + // If `InnerLoopExit` contains LCSSA Phi instructions, additional block + // may be inserted before the `OuterLoopLatch` to which `BI` jumps. The + // loops are still considered perfectly nested if the extra block only + // contains Phi instructions from InnerLoopExit and OuterLoopHeader. + if (InnerLoopExitContainsLCSSA && IsExtraPhiBlock(*Succ) && + Succ->getSingleSuccessor() == OuterLoopLatch) { + // Points to the extra block so that we can reference it later in the + // final check. We can also conclude that the inner loop is + // guarded and there exists LCSSA Phi node in the exit block later if + // we see a non-null `ExtraPhiBlock`. + ExtraPhiBlock = Succ; + continue; + } + + DEBUG_WITH_TYPE(VerboseDebug, { + dbgs() << "Inner loop guard successor " << Succ->getName() + << " doesn't lead to inner loop preheader or " + "outer loop latch.\n"; + }); + return false; + } } } - // Ensure the inner loop exit block leads to the outer loop latch. - const BasicBlock *SuccInner = InnerLoopExit->getSingleSuccessor(); - if (!SuccInner || - (SuccInner != OuterLoopLatch && SuccInner != ExtraPhiBlock)) { + // Ensure the inner loop exit block lead to the outer loop latch possibly + // through empty blocks. + const BasicBlock &SuccInner = + skipEmptyBlockUntil(InnerLoop.getExitBlock(), OuterLoopLatch); + if (&SuccInner != OuterLoopLatch && &SuccInner != ExtraPhiBlock) { DEBUG_WITH_TYPE( VerboseDebug, dbgs() << "Inner loop exit block " << *InnerLoopExit diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -492,6 +492,30 @@ ReplaceInstWithInst(From->getParent()->getInstList(), BI, To); } +const BasicBlock &llvm::skipEmptyBlockUntil(const BasicBlock *From, + const BasicBlock *End) { + assert(From && "Expecting valid From"); + assert(End && "Expecting valid End"); + + if (From == End || !From->getSingleSuccessor()) + return *From; + + auto IsEmpty = [](const BasicBlock *BB) { + return (BB->getInstList().size() == 1); + }; + + SmallPtrSet Visited; + const BasicBlock *BB = From->getSingleSuccessor(); + const BasicBlock *PredBB = BB; + while (BB && BB != End && IsEmpty(BB) && !Visited.count(BB)) { + Visited.insert(BB); + PredBB = BB; + BB = BB->getSingleSuccessor(); + } + + return (BB == End) ? *End : *PredBB; +} + BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU) { unsigned SuccNum = GetSuccessorNumber(BB, Succ); diff --git a/llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll b/llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll --- a/llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll +++ b/llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll @@ -85,6 +85,55 @@ ret void } +define void @perf_nest_2D_3(i32** %y, i32** %x, i64 signext %nx, i64 signext %ny) { +; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: perf_nest_2D_3_loop_j, Loops: ( perf_nest_2D_3_loop_j ) +; CHECK-LABEL: IsPerfect=true, Depth=2, OutermostLoop: perf_nest_2D_3_loop_i, Loops: ( perf_nest_2D_3_loop_i perf_nest_2D_3_loop_j ) +entry: + br label %perf_nest_2D_3_loop_i + +perf_nest_2D_3_loop_i: + %i = phi i64 [ 0, %entry ], [ %inc13, %inc_i ] + %cmp21 = icmp slt i64 0, %ny + br label %singleSucc + +singleSucc: + br i1 %cmp21, label %preheader.j, label %for.end + +preheader.j: + br label %perf_nest_2D_3_loop_j + +perf_nest_2D_3_loop_j: + %j = phi i64 [ 0, %preheader.j ], [ %inc, %inc_j ] + %arrayidx = getelementptr inbounds i32*, i32** %x, i64 %j + %0 = load i32*, i32** %arrayidx, align 8 + %arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %j + %1 = load i32, i32* %arrayidx6, align 4 + %arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %j + %2 = load i32*, i32** %arrayidx8, align 8 + %arrayidx11 = getelementptr inbounds i32, i32* %2, i64 %i + store i32 %1, i32* %arrayidx11, align 4 + br label %inc_j + +inc_j: + %inc = add nsw i64 %j, 1 + %cmp2 = icmp slt i64 %inc, %ny + br i1 %cmp2, label %perf_nest_2D_3_loop_j, label %for.exit + +for.exit: + br label %for.end + +for.end: + br label %inc_i + +inc_i: + %inc13 = add nsw i64 %i, 1 + %cmp = icmp slt i64 %inc13, %nx + br i1 %cmp, label %perf_nest_2D_3_loop_i, label %perf_nest_2D_3_loop_i_end + +perf_nest_2D_3_loop_i_end: + ret void +} + ; Test a perfect 3-dim loop nest of the form: ; for (i=0; i