Index: llvm/include/llvm/Analysis/LoopNestAnalysis.h =================================================================== --- llvm/include/llvm/Analysis/LoopNestAnalysis.h +++ llvm/include/llvm/Analysis/LoopNestAnalysis.h @@ -64,7 +64,8 @@ /// (if there are any). Return the last basic block found or \p End if it was /// reached during the search. static const BasicBlock &skipEmptyBlockUntil(const BasicBlock *From, - const BasicBlock *End); + const BasicBlock *End, + bool UniquePred = false); /// Return the outermost loop in the loop nest. Loop &getOutermostLoop() const { return *Loops.front(); } Index: llvm/lib/Analysis/LoopInfo.cpp =================================================================== --- llvm/lib/Analysis/LoopInfo.cpp +++ llvm/lib/Analysis/LoopInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/LoopInfoImpl.h" #include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopNestAnalysis.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -397,7 +398,16 @@ BasicBlock *GuardOtherSucc = (GuardBI->getSuccessor(0) == Preheader) ? GuardBI->getSuccessor(1) : GuardBI->getSuccessor(0); - return (GuardOtherSucc == ExitFromLatchSucc) ? GuardBI : nullptr; + + // Check if ExitFromLatchSucc (or any BasicBlock which is an empty unique + // successor of ExitFromLatchSucc) is equal to GuardOtherSucc. If + // skipEmptyBlockUntil returns GuardOtherSucc, then the guard branch for the + // loop is GuardBI (return GuardBI), otherwise return nullptr. + if (&LoopNest::skipEmptyBlockUntil(ExitFromLatchSucc, GuardOtherSucc, + /*UniquePred=*/true) == GuardOtherSucc) + return GuardBI; + else + return nullptr; } bool Loop::isCanonical(ScalarEvolution &SE) const { Index: llvm/lib/Analysis/LoopNestAnalysis.cpp =================================================================== --- llvm/lib/Analysis/LoopNestAnalysis.cpp +++ llvm/lib/Analysis/LoopNestAnalysis.cpp @@ -206,7 +206,8 @@ } const BasicBlock &LoopNest::skipEmptyBlockUntil(const BasicBlock *From, - const BasicBlock *End) { + const BasicBlock *End, + bool UniquePred) { assert(From && "Expecting valid From"); assert(End && "Expecting valid End"); @@ -221,7 +222,8 @@ SmallPtrSet Visited; const BasicBlock *BB = From->getUniqueSuccessor(); const BasicBlock *PredBB = BB; - while (BB && BB != End && IsEmpty(BB) && !Visited.count(BB)) { + while (BB && BB != End && IsEmpty(BB) && !Visited.count(BB) && + (!UniquePred || BB->getUniquePredecessor())) { Visited.insert(BB); PredBB = BB; BB = BB->getUniqueSuccessor(); Index: llvm/test/Analysis/LoopNestAnalysis/imperfectnest.ll =================================================================== --- llvm/test/Analysis/LoopNestAnalysis/imperfectnest.ll +++ llvm/test/Analysis/LoopNestAnalysis/imperfectnest.ll @@ -424,70 +424,3 @@ for.end13: ret void } - -; Test an imperfect loop nest of the form: -; for (int i = 0; i < nx; ++i) -; if (i > 5) { // user branch -; for (int j = 1; j <= 5; j+=2) -; y[j][i] = x[i][j] + j; -; } - -define void @imperf_nest_6(i32** %y, i32** %x, i32 signext %nx, i32 signext %ny) { -; CHECK-LABEL: IsPerfect=false, Depth=2, OutermostLoop: imperf_nest_6_loop_i, Loops: ( imperf_nest_6_loop_i imperf_nest_6_loop_j ) -entry: - %cmp2 = icmp slt i32 0, %nx - br i1 %cmp2, label %imperf_nest_6_loop_i.lr.ph, label %for.end13 - -imperf_nest_6_loop_i.lr.ph: - br label %imperf_nest_6_loop_i - -imperf_nest_6_loop_i: - %i.0 = phi i32 [ 0, %imperf_nest_6_loop_i.lr.ph ], [ %inc12, %for.inc11 ] - %cmp1 = icmp sgt i32 %i.0, 5 - br i1 %cmp1, label %imperf_nest_6_loop_j.lr.ph, label %if.end - -imperf_nest_6_loop_j.lr.ph: - br label %imperf_nest_6_loop_j - -imperf_nest_6_loop_j: - %j.0 = phi i32 [ 1, %imperf_nest_6_loop_j.lr.ph ], [ %inc, %for.inc ] - %idxprom = sext i32 %i.0 to i64 - %arrayidx = getelementptr inbounds i32*, i32** %x, i64 %idxprom - %0 = load i32*, i32** %arrayidx, align 8 - %idxprom5 = sext i32 %j.0 to i64 - %arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %idxprom5 - %1 = load i32, i32* %arrayidx6, align 4 - %add = add nsw i32 %1, %j.0 - %idxprom7 = sext i32 %j.0 to i64 - %arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %idxprom7 - %2 = load i32*, i32** %arrayidx8, align 8 - %idxprom9 = sext i32 %i.0 to i64 - %arrayidx10 = getelementptr inbounds i32, i32* %2, i64 %idxprom9 - store i32 %add, i32* %arrayidx10, align 4 - br label %for.inc - -for.inc: - %inc = add nsw i32 %j.0, 2 - %cmp3 = icmp sle i32 %inc, 5 - br i1 %cmp3, label %imperf_nest_6_loop_j, label %for.cond2.for.end_crit_edge - -for.cond2.for.end_crit_edge: - br label %for.end - -for.end: - br label %if.end - -if.end: - br label %for.inc11 - -for.inc11: - %inc12 = add nsw i32 %i.0, 1 - %cmp = icmp slt i32 %inc12, %nx - br i1 %cmp, label %imperf_nest_6_loop_i, label %for.cond.for.end13_crit_edge - -for.cond.for.end13_crit_edge: - br label %for.end13 - -for.end13: - ret void -} Index: llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll =================================================================== --- llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll +++ llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll @@ -322,3 +322,148 @@ %x.addr.0.lcssa = phi i32 [ %split7, %for.cond.for.end7_crit_edge ], [ %x, %entry ] ret i32 %x.addr.0.lcssa } + +; Test a perfect loop nest of the form: +; for (int i = 0; i < nx; ++i) +; if (i < ny) { // guard branch for the j-loop +; for (int j=i; j < ny; j+=1) +; y[j][i] = x[i][j] + j; +; } +define double @perf_nest_guard_branch(i32** %y, i32** %x, i32 signext %nx, i32 signext %ny) { +; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: test6Loop2, Loops: ( test6Loop2 ) +; CHECK-LABEL: IsPerfect=true, Depth=2, OutermostLoop: test6Loop1, Loops: ( test6Loop1 test6Loop2 ) +entry: + %cmp2 = icmp slt i32 0, %nx + br i1 %cmp2, label %test6Loop1.lr.ph, label %for.end13 + +test6Loop1.lr.ph: ; preds = %entry + br label %test6Loop1 + +test6Loop1: ; preds = %test6Loop1.lr.ph, %for.inc11 + %i.0 = phi i32 [ 0, %test6Loop1.lr.ph ], [ %inc12, %for.inc11 ] + %cmp1 = icmp slt i32 %i.0, %ny + br i1 %cmp1, label %test6Loop2.lr.ph, label %if.end + +test6Loop2.lr.ph: ; preds = %if.then + br label %test6Loop2 + +test6Loop2: ; preds = %test6Loop2.lr.ph, %for.inc + %j.0 = phi i32 [ %i.0, %test6Loop2.lr.ph ], [ %inc, %for.inc ] + %idxprom = sext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds i32*, i32** %x, i64 %idxprom + %0 = load i32*, i32** %arrayidx, align 8 + %idxprom5 = sext i32 %j.0 to i64 + %arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %idxprom5 + %1 = load i32, i32* %arrayidx6, align 4 + %add = add nsw i32 %1, %j.0 + %idxprom7 = sext i32 %j.0 to i64 + %arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %idxprom7 + %2 = load i32*, i32** %arrayidx8, align 8 + %idxprom9 = sext i32 %i.0 to i64 + %arrayidx10 = getelementptr inbounds i32, i32* %2, i64 %idxprom9 + store i32 %add, i32* %arrayidx10, align 4 + br label %for.inc + +for.inc: ; preds = %test6Loop2 + %inc = add nsw i32 %j.0, 1 + %cmp3 = icmp slt i32 %inc, %ny + br i1 %cmp3, label %test6Loop2, label %for.cond2.for.end_crit_edge + +for.cond2.for.end_crit_edge: ; preds = %for.inc + br label %for.end + +for.end: ; preds = %for.cond2.for.end_crit_edge, %if.then + br label %if.end + +if.end: ; preds = %for.end, %test6Loop1 + br label %for.inc11 + +for.inc11: ; preds = %if.end + %inc12 = add nsw i32 %i.0, 1 + %cmp = icmp slt i32 %inc12, %nx + br i1 %cmp, label %test6Loop1, label %for.cond.for.end13_crit_edge + +for.cond.for.end13_crit_edge: ; preds = %for.inc11 + br label %for.end13 + +for.end13: ; preds = %for.cond.for.end13_crit_edge, %entry + %arrayidx14 = getelementptr inbounds i32*, i32** %y, i64 0 + %3 = load i32*, i32** %arrayidx14, align 8 + %arrayidx15 = getelementptr inbounds i32, i32* %3, i64 0 + %4 = load i32, i32* %arrayidx15, align 4 + %conv = sitofp i32 %4 to double + ret double %conv +} + +; Test a perfect loop nest of the form: +; for (int i = 0; i < nx; ++i) +; if (i < ny) { // guard branch for the j-loop +; for (int j=i; j < ny; j+=1) +; y[j][i] = x[i][j] + j; +; } + +define double @test6(i32** %y, i32** %x, i32 signext %nx, i32 signext %ny) { +; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: test6Loop2, Loops: ( test6Loop2 ) +; CHECK-LABEL: IsPerfect=true, Depth=2, OutermostLoop: test6Loop1, Loops: ( test6Loop1 test6Loop2 ) +entry: + %cmp2 = icmp slt i32 0, %nx + br i1 %cmp2, label %test6Loop1.lr.ph, label %for.end13 + +test6Loop1.lr.ph: ; preds = %entry + br label %test6Loop1 + +test6Loop1: ; preds = %test6Loop1.lr.ph, %for.inc11 + %i.0 = phi i32 [ 0, %test6Loop1.lr.ph ], [ %inc12, %for.inc11 ] + %cmp1 = icmp slt i32 %i.0, %ny + br i1 %cmp1, label %test6Loop2.lr.ph, label %if.end + +test6Loop2.lr.ph: ; preds = %if.then + br label %test6Loop2 + +test6Loop2: ; preds = %test6Loop2.lr.ph, %for.inc + %j.0 = phi i32 [ %i.0, %test6Loop2.lr.ph ], [ %inc, %for.inc ] + %idxprom = sext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds i32*, i32** %x, i64 %idxprom + %0 = load i32*, i32** %arrayidx, align 8 + %idxprom5 = sext i32 %j.0 to i64 + %arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %idxprom5 + %1 = load i32, i32* %arrayidx6, align 4 + %add = add nsw i32 %1, %j.0 + %idxprom7 = sext i32 %j.0 to i64 + %arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %idxprom7 + %2 = load i32*, i32** %arrayidx8, align 8 + %idxprom9 = sext i32 %i.0 to i64 + %arrayidx10 = getelementptr inbounds i32, i32* %2, i64 %idxprom9 + store i32 %add, i32* %arrayidx10, align 4 + br label %for.inc + +for.inc: ; preds = %test6Loop2 + %inc = add nsw i32 %j.0, 1 + %cmp3 = icmp slt i32 %inc, %ny + br i1 %cmp3, label %test6Loop2, label %for.cond2.for.end_crit_edge + +for.cond2.for.end_crit_edge: ; preds = %for.inc + br label %for.end + +for.end: ; preds = %for.cond2.for.end_crit_edge, %if.then + br label %if.end + +if.end: ; preds = %for.end, %test6Loop1 + br label %for.inc11 + +for.inc11: ; preds = %if.end + %inc12 = add nsw i32 %i.0, 1 + %cmp = icmp slt i32 %inc12, %nx + br i1 %cmp, label %test6Loop1, label %for.cond.for.end13_crit_edge + +for.cond.for.end13_crit_edge: ; preds = %for.inc11 + br label %for.end13 + +for.end13: ; preds = %for.cond.for.end13_crit_edge, %entry + %arrayidx14 = getelementptr inbounds i32*, i32** %y, i64 0 + %3 = load i32*, i32** %arrayidx14, align 8 + %arrayidx15 = getelementptr inbounds i32, i32* %3, i64 0 + %4 = load i32, i32* %arrayidx15, align 4 + %conv = sitofp i32 %4 to double + ret double %conv +}