diff --git a/llvm/lib/Analysis/LoopNestAnalysis.cpp b/llvm/lib/Analysis/LoopNestAnalysis.cpp --- a/llvm/lib/Analysis/LoopNestAnalysis.cpp +++ b/llvm/lib/Analysis/LoopNestAnalysis.cpp @@ -206,6 +206,32 @@ return CurrentDepth; } +/// Recursivelly traverse all empty 'single successor' basic blocks of \p From +/// (if there are any). Return the last basic block found or \p End if it was +/// reached during the search. +const BasicBlock &skipEmptyBlockUntil(const BasicBlock *From, + const BasicBlock *End) { + assert((From != nullptr && End != nullptr) && "Expecting valid arguments"); + + if (From == End || !From->getSingleSuccessor()) + return *From; + + auto isEmpty = [](const BasicBlock *BB) { + return (BB->getInstList().size() == 1); + }; + + SmallPtrSet Visited; + const BasicBlock *BB = From->getSingleSuccessor(); + const BasicBlock *PredBB = BB; + while (BB && BB != End && isEmpty(BB) && Visited.count(BB) == 0) { + Visited.insert(BB); + PredBB = BB; + BB = BB->getSingleSuccessor(); + } + + return (BB == End) ? *End : *PredBB; +} + static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop, ScalarEvolution &SE) { // The inner loop must be the only outer loop's child. @@ -253,49 +279,55 @@ // Ensure the only branch that may exist between the loops is the inner loop // guard. if (OuterLoopHeader != InnerLoopPreHeader) { - const BranchInst *BI = - dyn_cast(OuterLoopHeader->getTerminator()); - - if (!BI || BI != InnerLoop.getLoopGuardBranch()) - return false; - - bool InnerLoopExitContainsLCSSA = ContainsLCSSAPhi(*InnerLoopExit); - - // The successors of the inner loop guard should be the inner loop - // preheader and the outer loop latch. - for (const BasicBlock *Succ : BI->successors()) { - if (Succ == InnerLoopPreHeader) - continue; - if (Succ == OuterLoopLatch) - continue; - - // If `InnerLoopExit` contains LCSSA Phi instructions, additional block - // may be inserted before the `OuterLoopLatch` to which `BI` jumps. The - // loops are still considered perfectly nested if the extra block only - // contains Phi instructions from InnerLoopExit and OuterLoopHeader. - if (InnerLoopExitContainsLCSSA && IsExtraPhiBlock(*Succ) && - Succ->getSingleSuccessor() == OuterLoopLatch) { - // Points to the extra block so that we can reference it later in the - // final check. We can also conclude that the inner loop is - // guarded and there exists LCSSA Phi node in the exit block later if we - // see a non-null `ExtraPhiBlock`. - ExtraPhiBlock = Succ; - continue; - } + const BasicBlock &SingleSucc = + skipEmptyBlockUntil(OuterLoopHeader, InnerLoopPreHeader); - DEBUG_WITH_TYPE(VerboseDebug, { - dbgs() << "Inner loop guard successor " << Succ->getName() - << " doesn't lead to inner loop preheader or " - "outer loop latch.\n"; - }); - return false; + // no conditional branch present + if (&SingleSucc != InnerLoopPreHeader) { + const BranchInst *BI = dyn_cast(SingleSucc.getTerminator()); + + if (!BI || BI != InnerLoop.getLoopGuardBranch()) + return false; + + bool InnerLoopExitContainsLCSSA = ContainsLCSSAPhi(*InnerLoopExit); + + // The successors of the inner loop guard should be the inner loop + // preheader and the outer loop latch. + for (const BasicBlock *Succ : BI->successors()) { + if (Succ == InnerLoopPreHeader) + continue; + if (Succ == OuterLoopLatch) + continue; + + // If `InnerLoopExit` contains LCSSA Phi instructions, additional block + // may be inserted before the `OuterLoopLatch` to which `BI` jumps. The + // loops are still considered perfectly nested if the extra block only + // contains Phi instructions from InnerLoopExit and OuterLoopHeader. + if (InnerLoopExitContainsLCSSA && IsExtraPhiBlock(*Succ) && + Succ->getSingleSuccessor() == OuterLoopLatch) { + // Points to the extra block so that we can reference it later in the + // final check. We can also conclude that the inner loop is + // guarded and there exists LCSSA Phi node in the exit block later if + // we see a non-null `ExtraPhiBlock`. + ExtraPhiBlock = Succ; + continue; + } + + DEBUG_WITH_TYPE(VerboseDebug, { + dbgs() << "Inner loop guard successor " << Succ->getName() + << " doesn't lead to inner loop preheader or " + "outer loop latch.\n"; + }); + return false; + } } } - // Ensure the inner loop exit block leads to the outer loop latch. - const BasicBlock *SuccInner = InnerLoopExit->getSingleSuccessor(); - if (!SuccInner || - (SuccInner != OuterLoopLatch && SuccInner != ExtraPhiBlock)) { + // Ensure the inner loop exit block lead to the outer loop latch possibly + // through empty blocks. + const BasicBlock &SuccInner = + skipEmptyBlockUntil(InnerLoop.getExitBlock(), OuterLoopLatch); + if (&SuccInner != OuterLoopLatch && &SuccInner != ExtraPhiBlock) { DEBUG_WITH_TYPE( VerboseDebug, dbgs() << "Inner loop exit block " << *InnerLoopExit diff --git a/llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll b/llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll --- a/llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll +++ b/llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll @@ -85,6 +85,49 @@ ret void } +define void @perf_nest_2D_3(i32** %y, i32** %x, i64 signext %nx, i64 signext %ny) { +; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: perf_nest_2D_3_loop_j, Loops: ( perf_nest_2D_3_loop_j ) +; CHECK-LABEL: IsPerfect=true, Depth=2, OutermostLoop: perf_nest_2D_3_loop_i, Loops: ( perf_nest_2D_3_loop_i perf_nest_2D_3_loop_j ) +entry: + br label %perf_nest_2D_3_loop_i + +perf_nest_2D_3_loop_i: + %i = phi i64 [ 0, %entry ], [ %inc13, %inc_i ] + %cmp21 = icmp slt i64 0, %ny + br label %singleSucc + +singleSucc: + br i1 %cmp21, label %perf_nest_2D_3_loop_j, label %inc_i + +perf_nest_2D_3_loop_j: + %j = phi i64 [ 0, %singleSucc ], [ %inc, %inc_j ] + %arrayidx = getelementptr inbounds i32*, i32** %x, i64 %j + %0 = load i32*, i32** %arrayidx, align 8 + %arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %j + %1 = load i32, i32* %arrayidx6, align 4 + %arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %j + %2 = load i32*, i32** %arrayidx8, align 8 + %arrayidx11 = getelementptr inbounds i32, i32* %2, i64 %i + store i32 %1, i32* %arrayidx11, align 4 + br label %inc_j + +inc_j: + %inc = add nsw i64 %j, 1 + %cmp2 = icmp slt i64 %inc, %ny + br i1 %cmp2, label %perf_nest_2D_3_loop_j, label %for.end + +for.end: + br label %inc_i + +inc_i: + %inc13 = add nsw i64 %i, 1 + %cmp = icmp slt i64 %inc13, %nx + br i1 %cmp, label %perf_nest_2D_3_loop_i, label %perf_nest_2D_3_loop_i_end + +perf_nest_2D_3_loop_i_end: + ret void +} + ; Test a perfect 3-dim loop nest of the form: ; for (i=0; i