diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -721,12 +721,13 @@ if (LatchBR->getSuccessor(0) != L->getHeader()) std::swap(BackedgeTakenWeight, LatchExitWeight); - if (!BackedgeTakenWeight || !LatchExitWeight) - return 0; + if (!LatchExitWeight) + return None; - // Divide the count of the backedge by the count of the edge exiting the loop, - // rounding to nearest. - return llvm::divideNearest(BackedgeTakenWeight, LatchExitWeight); + // Trip count is a ratio of loop header block execution weight to loop exit + // weight. + uint64_t HeaderBlockWeight = BackedgeTakenWeight + LatchExitWeight; + return llvm::divideNearest(HeaderBlockWeight, LatchExitWeight); } bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop, diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll --- a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll @@ -11,7 +11,7 @@ define void @test1(i32 %k) !prof !4 { ; CHECK: Loop Unroll: F[test1] Loop %for.body ; CHECK: PEELING loop %for.body with iteration count 2! -; CHECK: PEELING loop %for.body with iteration count 4! +; CHECK: PEELING loop %for.body with iteration count 5! ; CHECK: llvm.loop.unroll.disable for.body.lr.ph: br label %for.body diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom-2.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom-2.ll --- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom-2.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom-2.ll @@ -5,7 +5,7 @@ ; Regression test for setting the correct idom for exit blocks. ; CHECK: Loop Unroll: F[basic] -; CHECK: PEELING loop %for.body with iteration count 1! +; CHECK: PEELING loop %for.body with iteration count 2! define i32 @basic(i32* %p, i32 %k, i1 %c1, i1 %c2) #0 !prof !3 { entry: diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom.ll --- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom.ll @@ -5,7 +5,7 @@ ; Regression test for setting the correct idom for exit blocks. ; CHECK: Loop Unroll: F[basic] -; CHECK: PEELING loop %for.body with iteration count 1! +; CHECK: PEELING loop %for.body with iteration count 2! define i32 @basic(i32* %p, i32 %k, i1 %c1, i1 %c2) #0 !prof !3 { entry: diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll --- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll @@ -8,7 +8,7 @@ ; All side exits to deopt does not change weigths. ; CHECK: Loop Unroll: F[basic] -; CHECK: PEELING loop %for.body with iteration count 3! +; CHECK: PEELING loop %for.body with iteration count 4! ; CHECK-NO-PEEL-NOT: PEELING loop %for.body ; CHECK-LABEL: @basic ; CHECK: br i1 %c, label %{{.*}}, label %side_exit, !prof !15 diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll --- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll @@ -9,7 +9,7 @@ ; from the loop, and update the branch weights for the peeled loop properly. ; CHECK: Loop Unroll: F[basic] -; CHECK: PEELING loop %for.body with iteration count 3! +; CHECK: PEELING loop %for.body with iteration count 4! ; CHECK: Loop Unroll: F[optsize] ; CHECK-NOT: PEELING diff --git a/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp b/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp --- a/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp +++ b/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp @@ -7,84 +7,184 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/AsmParser/Parser.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/SourceMgr.h" #include "gtest/gtest.h" using namespace llvm; -static std::unique_ptr parseIR(LLVMContext &C, const char *IR) { - SMDiagnostic Err; - std::unique_ptr Mod = parseAssemblyString(IR, Err, C); - if (!Mod) - Err.print("LoopUtilsTests", errs()); - return Mod; -} +namespace { +struct LoopUtilsPass : public PassInfoMixin { + template LoopUtilsPass(T &&Arg) : Func(std::forward(Arg)) {} + + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U) { + return Func(L, AM, AR, U); + } + + std::function + Func; +}; + +struct LoopUtilsTest : public testing::Test { + + PassBuilder PB; + LoopAnalysisManager LAM; + FunctionAnalysisManager FAM; + CGSCCAnalysisManager CGAM; + ModuleAnalysisManager MAM; + LLVMContext Context; + std::unique_ptr M; + + LoopUtilsTest() { + // Register all the basic analyses with the managers. + PB.registerModuleAnalyses(MAM); + PB.registerCGSCCAnalyses(CGAM); + PB.registerFunctionAnalyses(FAM); + PB.registerLoopAnalyses(LAM); + PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + } + + void ParseAssembly(const char *IR) { + SMDiagnostic Error; + M = parseAssemblyString(IR, Error, Context); + + std::string errMsg; + raw_string_ostream os(errMsg); + Error.print("", os); + + // A failure here means that the test itself is buggy. + if (!M) + report_fatal_error(os.str().c_str()); + } +}; + +TEST_F(LoopUtilsTest, DeleteDeadLoopNest) { + ParseAssembly("define void @foo() {\n" + "entry:\n" + " br label %for.i\n" + "for.i:\n" + " %i = phi i64 [ 0, %entry ], [ %inc.i, %for.i.latch ]\n" + " br label %for.j\n" + "for.j:\n" + " %j = phi i64 [ 0, %for.i ], [ %inc.j, %for.j ]\n" + " %inc.j = add nsw i64 %j, 1\n" + " %cmp.j = icmp slt i64 %inc.j, 100\n" + " br i1 %cmp.j, label %for.j, label %for.k.preheader\n" + "for.k.preheader:\n" + " br label %for.k\n" + "for.k:\n" + " %k = phi i64 [ %inc.k, %for.k ], [ 0, %for.k.preheader ]\n" + " %inc.k = add nsw i64 %k, 1\n" + " %cmp.k = icmp slt i64 %inc.k, 100\n" + " br i1 %cmp.k, label %for.k, label %for.i.latch\n" + "for.i.latch:\n" + " %inc.i = add nsw i64 %i, 1\n" + " %cmp.i = icmp slt i64 %inc.i, 100\n" + " br i1 %cmp.i, label %for.i, label %for.end\n" + "for.end:\n" + " ret void\n" + "}\n"); + + + LoopPassManager LPM; + LPM.addPass(LoopUtilsPass([=](Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) -> PreservedAnalyses { + auto &F = *L.getHeader()->getParent(); + auto &SE = AR.SE; + auto &LI = AR.LI; + auto &DT = AR.DT; + + std::string LoopName = L.getName(); + if (LoopName == "for.i") { + assert(LI.begin() != LI.end() && "Expecting loops in function F"); -static void run(Module &M, StringRef FuncName, - function_ref - Test) { - Function *F = M.getFunction(FuncName); - DominatorTree DT(*F); - TargetLibraryInfoImpl TLII; - TargetLibraryInfo TLI(TLII); - AssumptionCache AC(*F); - LoopInfo LI(DT); - ScalarEvolution SE(*F, TLI, AC, DT, LI); - Test(*F, DT, SE, LI); + deleteDeadLoop(&L, &DT, &SE, &LI); + + assert(DT.verify(DominatorTree::VerificationLevel::Fast) && + "Expecting valid dominator tree"); + LI.verify(DT); + assert(LI.begin() == LI.end() && "Expecting no loops left in function F"); + SE.verify(); + + Function::iterator FI = F.begin(); + BasicBlock *Entry = &*(FI++); + assert(Entry->getName() == "entry" && "Expecting BasicBlock entry"); + const BranchInst *BI = dyn_cast(Entry->getTerminator()); + assert(BI && "Expecting valid branch instruction"); + EXPECT_EQ(BI->getNumSuccessors(), (unsigned)1); + EXPECT_EQ(BI->getSuccessor(0)->getName(), "for.end"); + + U.markLoopAsDeleted(L, LoopName); + return PreservedAnalyses::none(); + } + return PreservedAnalyses::all(); + })); + + FunctionPassManager FPM; + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM))); + + ModulePassManager MPM; + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + + MPM.run(*M, MAM); } -TEST(LoopUtils, DeleteDeadLoopNest) { - LLVMContext C; - std::unique_ptr M = - parseIR(C, "define void @foo() {\n" - "entry:\n" - " br label %for.i\n" - "for.i:\n" - " %i = phi i64 [ 0, %entry ], [ %inc.i, %for.i.latch ]\n" - " br label %for.j\n" - "for.j:\n" - " %j = phi i64 [ 0, %for.i ], [ %inc.j, %for.j ]\n" - " %inc.j = add nsw i64 %j, 1\n" - " %cmp.j = icmp slt i64 %inc.j, 100\n" - " br i1 %cmp.j, label %for.j, label %for.k.preheader\n" - "for.k.preheader:\n" - " br label %for.k\n" - "for.k:\n" - " %k = phi i64 [ %inc.k, %for.k ], [ 0, %for.k.preheader ]\n" - " %inc.k = add nsw i64 %k, 1\n" - " %cmp.k = icmp slt i64 %inc.k, 100\n" - " br i1 %cmp.k, label %for.k, label %for.i.latch\n" - "for.i.latch:\n" - " %inc.i = add nsw i64 %i, 1\n" - " %cmp.i = icmp slt i64 %inc.i, 100\n" - " br i1 %cmp.i, label %for.i, label %for.end\n" - "for.end:\n" - " ret void\n" - "}\n"); - - run(*M, "foo", - [&](Function &F, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI) { - assert(LI.begin() != LI.end() && "Expecting loops in function F"); - Loop *L = *LI.begin(); - assert(L && L->getName() == "for.i" && "Expecting loop for.i"); - - deleteDeadLoop(L, &DT, &SE, &LI); - - assert(DT.verify(DominatorTree::VerificationLevel::Fast) && - "Expecting valid dominator tree"); - LI.verify(DT); - assert(LI.begin() == LI.end() && - "Expecting no loops left in function F"); - SE.verify(); - - Function::iterator FI = F.begin(); - BasicBlock *Entry = &*(FI++); - assert(Entry->getName() == "entry" && "Expecting BasicBlock entry"); - const BranchInst *BI = dyn_cast(Entry->getTerminator()); - assert(BI && "Expecting valid branch instruction"); - EXPECT_EQ(BI->getNumSuccessors(), (unsigned)1); - EXPECT_EQ(BI->getSuccessor(0)->getName(), "for.end"); - }); +TEST_F(LoopUtilsTest, getLoopEstimatedTripCountTest) { + ParseAssembly(R"IR( + @a = dso_local global [1024 x i32] zeroinitializer, align 16 + @b = dso_local global [1024 x i32] zeroinitializer, align 16 + define dso_local void @_Z3foov() { + entry: + br label %for.body + for.cond.cleanup: ; preds = %for.body + ret void + for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @b, i64 0, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %1 = trunc i64 %indvars.iv to i32 + %mul = mul nsw i32 %0, %1 + %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @a, i64 0, i64 %indvars.iv + %2 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %2, %mul + store i32 %add, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.cond.cleanup, label %for.body, !prof !1} + + !1 = !{!"branch_weights", i32 1, i32 15 })IR"); + + + LoopPassManager LPM; + LPM.addPass(LoopUtilsPass([=](Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U) -> PreservedAnalyses { + auto TC = getLoopEstimatedTripCount(&L); + EXPECT_TRUE(TC.hasValue()); + EXPECT_EQ(*TC, (unsigned)16); + return PreservedAnalyses::all(); + })); + + FunctionPassManager FPM; + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM))); + + ModulePassManager MPM; + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + + MPM.run(*M, MAM); +} }